function tokenize()

in loader/lib/Scanner.js [55:258]


function tokenize (source) {
  var operators = "(),;:.";   // Legal one-character operators
  var result = [];            // An array to hold the results

  var c;                      // The current character
  var i;                      // The index of the current character
  var v;                      // Intermediate value
  var tok;                    // Current token
  var q;                      // Quote character
  var line = 1, col = 1;      // Current line and column of input

  function peek() {           // Look ahead one character 
    return source.charAt(i+1);
  }
  
  function advance(n) {       // Advance to next character
    var amt = n || 1;
    if(i + amt >= source.length) {
      i = source.length;
      c = '';
    }
    else { 
      i += amt;
      c = source.charAt(i);
    }
    if(c == '\n') {  line += 1;  col = 0; }
    else          {  col += amt; }
  }

  function begin() {          // Begin tokenizing
    i = 0;
    c = source.charAt(i);
    if(c == '\n') { line = 1; }
  }

  function isAlpha() { 
    return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));
  }
  
  function isAlpha_() {
    return (c === '_'  || isAlpha());
  }

  function isNumeric() {
    return (c >= '0' && c <= '9');
  }

  function isInitialNumeric() {
    var p = peek();
    return (isNumeric() || (c == '-' && p >= '0' && p <= '9'));
  }

  function isNonInitialNumeric() {
    return (isNumeric() || (c == '.'));
  }
  
  function isAlphanumeric() {
    return (isAlpha_() || isNumeric());
  }

  /* Tokens */
  function Token(type, initialValue) {
    this.type = type;
    this.line = line;
    this.column = col;
    this.str = initialValue;
    advance();
  }

  Token.prototype.consume = function() {
    this.str += c;
    advance();
  };
  
  Token.prototype.deliver = function(value) {
    this.value = value || this.str;
    udebug.log("Token deliver", this.type, this.value);
    delete this.str;
    result.push(this);
  };
  
  Token.prototype.error = function(message) {
    var err = new Error(message);
    err.token = this;
    throw err;
  };

  /* Examine the text one character at a time. */
  begin();
  while (c) {
    tok = null;
  
    if (c <= ' ') {                                      /* IGNORE WHITESPACE */
      advance(); 
    }

    else if(isAlpha()) {                                              /* NAME */
      tok = new Token('name', c);
      while (isAlphanumeric()) {
        tok.consume(c);
      }
      tok.deliver();
      if(tok.value === "BEGINDATA") {
        tok.type = 'begindata';
        return result;
      }
    }

    else if (c === '@') {                                        /* @VARIABLE */
      tok = new Token('variable', '');
      while (isAlphanumeric()) {
        tok.consume();
      }
      tok.deliver();
    }
    
    else if (isInitialNumeric()) {                                  /* NUMBER */
      tok = new Token('number', c);
      while(isNonInitialNumeric()) {
        tok.consume();
      }
      v = + tok.str;  // numeric value
      if(isFinite(v))  { tok.deliver(v); }
      else             { tok.error("bad number"); }
    }
        
    else if (c === '\'' || c === '"' || c === '`') {         /* QUOTED STRING */
      q = c;
      tok = new Token('string', '');
      while (c !== q) {                       /* until closing quote */

        /* Special cases: unterminated string, control character, escapes */
        if (c === '\n' || c === '\r' || c === '') {
          tok.error("Unterminated string.");
        }
        else if (c < ' ') { 
          tok.error("Control character in string.");
        }
        else if (c === '\\') {  /* escape sequence */
          advance();
          switch (c) {
            case '':
              tok.error("Unterminated string");
              break;
            case 'b':
              c = '\b'; break;
            case 'f':
              c = '\f'; break;
            case 'n':
              c = '\n'; break;
            case 'r':
              c = '\r'; break;
            case 't':
              c = '\t'; break;
            case 'u':
              v = parseInt(source.substr(i + 1, 4), 16);
              if (v < 0 || v > 0xFFFF) {
                tok.error("Bad Unicode character sequence");
              }
              c = String.fromCharCode(v);
              advance(4);
              break;
          }
        }

        tok.consume(); 
      }

      advance(); /* advance past closing quote */
      tok.deliver();
    }
    
    else if (c === '-' && peek() === '-') {        /* COMMENTS FROM -- TO EOL */
      advance(2);
      while(c !== '\n' && c !== '\r' && c !== '') {
        advance();
      }
    }

    else if (c === '/' && peek() === '*') {            // COMMENTS FROM /* TO */
      advance(2);
      while(c && c !== '*' && peek() !== '/') {
        advance();
      }
      advance(2);
      if(c === '') { throw new Error("Unterminated comment"); }
    }
  
    else if(operators.indexOf(c) >= 0) {         /* SINGLE-CHARACTER OPERATOR */
      tok = new Token('operator', c);
      tok.deliver();
    }
    
    else {
      v = "scanner error";
      if(result.length) {  v += " after " + result.pop().value;  }
      v += " at position " + i;
      throw new Error(v);
    }

  }  /* end of while loop */

  return result;
}