final int getToken()

in rhino/src/main/java/org/mozilla/javascript/TokenStream.java [649:1398]
641 lines of code
211 McCabe index (conditional complexity)

    final int getToken() throws IOException {
        int c;

        for (; ; ) {
            // Eat whitespace, possibly sensitive to newlines.
            for (; ; ) {
                c = getChar();
                if (c == EOF_CHAR) {
                    tokenStartLastLineEnd = lastLineEnd;
                    tokenStartLineno = lineno;
                    tokenBeg = cursor - 1;
                    tokenEnd = cursor;
                    return Token.EOF;
                } else if (c == '\n') {
                    dirtyLine = false;
                    tokenStartLastLineEnd = lastLineEnd;
                    tokenStartLineno = lineno;
                    tokenBeg = cursor - 1;
                    tokenEnd = cursor;
                    return Token.EOL;
                } else if (!isJSSpace(c)) {
                    if (c != '-') {
                        dirtyLine = true;
                    }
                    break;
                }
            }

            // Assume the token will be 1 char - fixed up below.
            tokenStartLastLineEnd = lastLineEnd;
            tokenStartLineno = lineno;
            tokenBeg = cursor - 1;
            tokenEnd = cursor;

            if (c == '@') return Token.XMLATTR;

            // identifier/keyword/instanceof?
            // watch out for starting with a <backslash>
            boolean identifierStart;
            boolean isUnicodeEscapeStart = false;
            if (c == '\\') {
                c = getChar();
                if (c == 'u') {
                    identifierStart = true;
                    isUnicodeEscapeStart = true;
                    stringBufferTop = 0;
                } else {
                    identifierStart = false;
                    ungetChar(c);
                    c = '\\';
                }
            } else {
                identifierStart = Character.isUnicodeIdentifierStart(c) || c == '$' || c == '_';
                if (identifierStart) {
                    stringBufferTop = 0;
                    addToString(c);
                }
            }

            if (identifierStart) {
                boolean containsEscape = isUnicodeEscapeStart;
                for (; ; ) {
                    if (isUnicodeEscapeStart) {
                        // strictly speaking we should probably push-back
                        // all the bad characters if the <backslash>uXXXX
                        // sequence is malformed. But since there isn't a
                        // correct context(is there?) for a bad Unicode
                        // escape sequence in an identifier, we can report
                        // an error here.
                        int escapeVal = 0;
                        if (matchTemplateLiteralChar('{')) {
                            for (; ; ) {
                                c = getTemplateLiteralChar();

                                if (c == '}') {
                                    break;
                                }
                                escapeVal = Kit.xDigitToInt(c, escapeVal);
                                if (escapeVal < 0) {
                                    break;
                                }
                            }

                            if (escapeVal < 0 || escapeVal > 0x10FFFF) {
                                parser.reportError("msg.invalid.escape");
                                break;
                            }
                        } else {
                            for (int i = 0; i != 4; ++i) {
                                c = getChar();
                                escapeVal = Kit.xDigitToInt(c, escapeVal);
                                // Next check takes care about c < 0 and bad escape
                                if (escapeVal < 0) {
                                    parser.reportError("msg.invalid.escape");
                                    break;
                                }
                            }
                        }
                        if (escapeVal < 0) {
                            parser.addError("msg.invalid.escape");
                            return Token.ERROR;
                        }
                        addToString(escapeVal);
                        isUnicodeEscapeStart = false;
                    } else {
                        c = getChar();
                        if (c == '\\') {
                            c = getChar();
                            if (c == 'u') {
                                isUnicodeEscapeStart = true;
                                containsEscape = true;
                            } else {
                                parser.addError("msg.illegal.character", c);
                                return Token.ERROR;
                            }
                        } else {
                            if (c == EOF_CHAR
                                    || c == BYTE_ORDER_MARK
                                    || !(Character.isUnicodeIdentifierPart(c) || c == '$')) {
                                break;
                            }
                            addToString(c);
                        }
                    }
                }
                ungetChar(c);

                String str = getStringFromBuffer();
                if (!containsEscape
                        || parser.compilerEnv.getLanguageVersion() >= Context.VERSION_ES6) {
                    // OPT we shouldn't have to make a string (object!) to
                    // check if it's a keyword.

                    // Return the corresponding token if it's a keyword
                    int result =
                            stringToKeyword(
                                    str,
                                    parser.compilerEnv.getLanguageVersion(),
                                    parser.inUseStrictDirective());
                    if (result != Token.EOF) {
                        if ((result == Token.LET || result == Token.YIELD)
                                && parser.compilerEnv.getLanguageVersion() < Context.VERSION_1_7) {
                            // LET and YIELD are tokens only in 1.7 and later
                            string = result == Token.LET ? "let" : "yield";
                            result = Token.NAME;
                        }
                        // Save the string in case we need to use in
                        // object literal definitions.
                        this.string = internString(str);
                        if (result != Token.RESERVED) {
                            return result;
                        } else if (parser.compilerEnv.getLanguageVersion() >= Context.VERSION_ES6) {
                            return result;
                        } else if (!parser.compilerEnv.isReservedKeywordAsIdentifier()) {
                            return result;
                        }
                    }
                } else if (isKeyword(
                        str,
                        parser.compilerEnv.getLanguageVersion(),
                        parser.inUseStrictDirective())) {
                    // If a string contains unicodes, and converted to a keyword,
                    // we convert the last character back to unicode
                    str = convertLastCharToHex(str);
                }

                if (containsEscape
                        && parser.compilerEnv.getLanguageVersion() >= Context.VERSION_ES6
                        && !isValidIdentifierName(str)) {
                    parser.reportError("msg.invalid.escape");
                    return Token.ERROR;
                }

                this.string = internString(str);
                return Token.NAME;
            }

            // is it a number?
            if (isDigit(c) || (c == '.' && isDigit(peekChar()))) {
                stringBufferTop = 0;
                int base = 10;
                isHex = isOldOctal = isOctal = isBinary = false;
                boolean es6 = parser.compilerEnv.getLanguageVersion() >= Context.VERSION_ES6;

                if (c == '0') {
                    c = getChar();
                    if (c == 'x' || c == 'X') {
                        base = 16;
                        isHex = true;
                        c = getChar();
                    } else if (es6 && (c == 'o' || c == 'O')) {
                        base = 8;
                        isOctal = true;
                        c = getChar();
                    } else if (es6 && (c == 'b' || c == 'B')) {
                        base = 2;
                        isBinary = true;
                        c = getChar();
                    } else if (isDigit(c)) {
                        base = 8;
                        isOldOctal = true;
                    } else {
                        addToString('0');
                    }
                }

                int emptyDetector = stringBufferTop;
                if (base == 10 || base == 16 || (base == 8 && !isOldOctal) || base == 2) {
                    c = readDigits(base, c);
                    if (c == REPORT_NUMBER_FORMAT_ERROR) {
                        parser.addError("msg.caught.nfe");
                        return Token.ERROR;
                    }
                } else {
                    while (isDigit(c)) {
                        // finally the oldOctal case
                        if (c >= '8') {
                            /*
                             * We permit 08 and 09 as decimal numbers, which
                             * makes our behavior a superset of the ECMA
                             * numeric grammar.  We might not always be so
                             * permissive, so we warn about it.
                             */
                            parser.addWarning("msg.bad.octal.literal", c == '8' ? "8" : "9");
                            base = 10;

                            c = readDigits(base, c);
                            if (c == REPORT_NUMBER_FORMAT_ERROR) {
                                parser.addError("msg.caught.nfe");
                                return Token.ERROR;
                            }
                            break;
                        }
                        addToString(c);
                        c = getChar();
                    }
                }
                if (stringBufferTop == emptyDetector && (isBinary || isOctal || isHex)) {
                    parser.addError("msg.caught.nfe");
                    return Token.ERROR;
                }

                boolean isInteger = true;
                boolean isBigInt = false;

                if (es6 && c == 'n') {
                    isBigInt = true;
                    c = getChar();
                } else if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
                    isInteger = false;
                    if (c == '.') {
                        isInteger = false;
                        addToString(c);
                        c = getChar();
                        c = readDigits(base, c);
                        if (c == REPORT_NUMBER_FORMAT_ERROR) {
                            parser.addError("msg.caught.nfe");
                            return Token.ERROR;
                        }
                    }
                    if (c == 'e' || c == 'E') {
                        isInteger = false;
                        addToString(c);
                        c = getChar();
                        if (c == '+' || c == '-') {
                            addToString(c);
                            c = getChar();
                        }
                        if (!isDigit(c)) {
                            parser.addError("msg.missing.exponent");
                            return Token.ERROR;
                        }
                        c = readDigits(base, c);
                        if (c == REPORT_NUMBER_FORMAT_ERROR) {
                            parser.addError("msg.caught.nfe");
                            return Token.ERROR;
                        }
                    }
                }
                ungetChar(c);
                String numString = getStringFromBuffer();
                this.string = numString;

                // try to remove the separator in a fast way
                int pos = numString.indexOf(NUMERIC_SEPARATOR);
                if (pos != -1) {
                    final char[] chars = numString.toCharArray();
                    for (int i = pos + 1; i < chars.length; i++) {
                        if (chars[i] != NUMERIC_SEPARATOR) {
                            chars[pos++] = chars[i];
                        }
                    }
                    numString = new String(chars, 0, pos);
                }

                if (isBigInt) {
                    this.bigInt = new BigInteger(numString, base);
                    return Token.BIGINT;
                }

                double dval;
                if (base == 10 && !isInteger) {
                    try {
                        // Use Java conversion to number from string...
                        dval = Double.parseDouble(numString);
                    } catch (NumberFormatException ex) {
                        parser.addError("msg.caught.nfe");
                        return Token.ERROR;
                    }
                } else {
                    dval = ScriptRuntime.stringPrefixToNumber(numString, 0, base);
                }

                this.number = dval;
                return Token.NUMBER;
            }

            // is it a string?
            if (c == '"' || c == '\'') {
                // We attempt to accumulate a string the fast way, by
                // building it directly out of the reader.  But if there
                // are any escaped characters in the string, we revert to
                // building it out of a StringBuffer.

                quoteChar = c;
                stringBufferTop = 0;

                c = getCharIgnoreLineEnd(false);
                strLoop:
                while (c != quoteChar) {
                    boolean unterminated = false;
                    if (c == EOF_CHAR) {
                        unterminated = true;
                    } else if (c == '\n') {
                        switch (lineEndChar) {
                            case '\n':
                            case '\r':
                                unterminated = true;
                                break;
                            case 0x2028: // <LS>
                            case 0x2029: // <PS>
                                // Line/Paragraph separators need to be included as is
                                c = lineEndChar;
                                break;
                            default:
                                break;
                        }
                    }

                    if (unterminated) {
                        ungetCharIgnoreLineEnd(c);
                        tokenEnd = cursor;
                        parser.addError("msg.unterminated.string.lit");
                        return Token.ERROR;
                    }

                    if (c == '\\') {
                        // We've hit an escaped character
                        int escapeVal;

                        c = getChar();
                        switch (c) {
                            case 'b':
                                c = '\b';
                                break;
                            case 'f':
                                c = '\f';
                                break;
                            case 'n':
                                c = '\n';
                                break;
                            case 'r':
                                c = '\r';
                                break;
                            case 't':
                                c = '\t';
                                break;

                            // \v a late addition to the ECMA spec,
                            // it is not in Java, so use 0xb
                            case 'v':
                                c = 0xb;
                                break;

                            case 'u':
                                // Get 4 hex digits; if the u escape is not
                                // followed by 4 hex digits, use 'u' + the
                                // literal character sequence that follows.
                                int escapeStart = stringBufferTop;
                                addToString('u');
                                escapeVal = 0;
                                if (matchChar('{')) {
                                    for (; ; ) {
                                        c = getChar();

                                        if (c == '}') {
                                            addToString(c);
                                            break;
                                        }
                                        escapeVal = Kit.xDigitToInt(c, escapeVal);
                                        if (escapeVal < 0) {
                                            break;
                                        }
                                        addToString(c);
                                    }

                                    if (escapeVal < 0 || escapeVal > 0x10FFFF) {
                                        parser.reportError("msg.invalid.escape");
                                        continue strLoop;
                                    }
                                } else {
                                    for (int i = 0; i != 4; ++i) {
                                        c = getChar();
                                        escapeVal = Kit.xDigitToInt(c, escapeVal);
                                        if (escapeVal < 0) {
                                            if (parser.compilerEnv.getLanguageVersion()
                                                    >= Context.VERSION_ES6) {
                                                parser.reportError("msg.invalid.escape");
                                            }
                                            continue strLoop;
                                        }
                                        addToString(c);
                                    }
                                }
                                // prepare for replace of stored 'u' sequence
                                // by escape value
                                stringBufferTop = escapeStart;
                                c = escapeVal;
                                break;
                            case 'x':
                                // Get 2 hex digits, defaulting to 'x'+literal
                                // sequence, as above.
                                c = getChar();
                                escapeVal = Kit.xDigitToInt(c, 0);
                                if (escapeVal < 0) {
                                    addToString('x');
                                    continue strLoop;
                                }
                                int c1 = c;
                                c = getChar();
                                escapeVal = Kit.xDigitToInt(c, escapeVal);
                                if (escapeVal < 0) {
                                    addToString('x');
                                    addToString(c1);
                                    continue strLoop;
                                }
                                // got 2 hex digits
                                c = escapeVal;
                                break;

                            case '\n':
                                // Remove line terminator after escape to follow
                                // SpiderMonkey and C/C++
                                c = getChar();
                                continue strLoop;

                            default:
                                if ('0' <= c && c < '8') {
                                    int val = c - '0';
                                    c = getChar();
                                    if ('0' <= c && c < '8') {
                                        val = 8 * val + c - '0';
                                        c = getChar();
                                        if ('0' <= c && c < '8' && val <= 037) {
                                            // c is 3rd char of octal sequence only
                                            // if the resulting val <= 0377
                                            val = 8 * val + c - '0';
                                            c = getChar();
                                        }
                                    }
                                    ungetChar(c);
                                    c = val;
                                }
                        }
                    }
                    addToString(c);
                    c = getChar(false);
                }

                String str = getStringFromBuffer();
                this.string = internString(str);
                cursor = sourceCursor;
                tokenEnd = cursor;
                return Token.STRING;
            }

            if (c == '#'
                    && cursor == 1
                    && peekChar() == '!'
                    && !this.parser.calledByCompileFunction) {
                // #! hashbang: only on the first line of a Script, no leading whitespace
                skipLine();
                return Token.COMMENT;
            }

            switch (c) {
                case ';':
                    return Token.SEMI;
                case '[':
                    return Token.LB;
                case ']':
                    return Token.RB;
                case '{':
                    return Token.LC;
                case '}':
                    return Token.RC;
                case '(':
                    return Token.LP;
                case ')':
                    return Token.RP;
                case ',':
                    return Token.COMMA;
                case '?':
                    if (parser.compilerEnv.getLanguageVersion() >= Context.VERSION_ES6) {
                        if (peekChar() == '.') {
                            // ?.digit is to be treated as ? .num
                            getChar();
                            if (!isDigit(peekChar())) {
                                return Token.QUESTION_DOT;
                            }
                            ungetChar('.');
                        } else if (matchChar('?')) {
                            if (matchChar('=')) {
                                return Token.ASSIGN_NULLISH;
                            }
                            return Token.NULLISH_COALESCING;
                        }
                    }
                    return Token.HOOK;
                case ':':
                    if (matchChar(':')) {
                        return Token.COLONCOLON;
                    }
                    return Token.COLON;
                case '.':
                    if (matchChar('.')) {
                        if (parser.compilerEnv.getLanguageVersion() >= Context.VERSION_1_8
                                && matchChar('.')) {
                            return Token.DOTDOTDOT;
                        }
                        return Token.DOTDOT;
                    } else if (matchChar('(')) {
                        return Token.DOTQUERY;
                    } else {
                        return Token.DOT;
                    }

                case '|':
                    if (matchChar('|')) {
                        if (matchChar('=')) return Token.ASSIGN_LOGICAL_OR;
                        else return Token.OR;
                    } else if (matchChar('=')) {
                        return Token.ASSIGN_BITOR;
                    } else {
                        return Token.BITOR;
                    }

                case '^':
                    if (matchChar('=')) {
                        return Token.ASSIGN_BITXOR;
                    }
                    return Token.BITXOR;

                case '&':
                    if (matchChar('&')) {
                        if (matchChar('=')) return Token.ASSIGN_LOGICAL_AND;
                        else return Token.AND;
                    } else if (matchChar('=')) {
                        return Token.ASSIGN_BITAND;
                    } else {
                        return Token.BITAND;
                    }

                case '=':
                    if (matchChar('=')) {
                        if (matchChar('=')) {
                            return Token.SHEQ;
                        }
                        return Token.EQ;
                    } else if (matchChar('>')) {
                        return Token.ARROW;
                    } else {
                        return Token.ASSIGN;
                    }

                case '!':
                    if (matchChar('=')) {
                        if (matchChar('=')) {
                            return Token.SHNE;
                        }
                        return Token.NE;
                    }
                    return Token.NOT;

                case '<':
                    /* NB:treat HTML begin-comment as comment-till-eol */
                    if (matchChar('!')) {
                        if (matchChar('-')) {
                            if (matchChar('-')) {
                                tokenStartLastLineEnd = lastLineEnd;
                                tokenStartLineno = lineno;
                                tokenBeg = cursor - 4;
                                skipLine();
                                commentType = Token.CommentType.HTML;
                                return Token.COMMENT;
                            }
                            ungetCharIgnoreLineEnd('-');
                        }
                        ungetCharIgnoreLineEnd('!');
                    }
                    if (matchChar('<')) {
                        if (matchChar('=')) {
                            return Token.ASSIGN_LSH;
                        }
                        return Token.LSH;
                    }
                    if (matchChar('=')) {
                        return Token.LE;
                    }
                    return Token.LT;

                case '>':
                    if (matchChar('>')) {
                        if (matchChar('>')) {
                            if (matchChar('=')) {
                                return Token.ASSIGN_URSH;
                            }
                            return Token.URSH;
                        }
                        if (matchChar('=')) {
                            return Token.ASSIGN_RSH;
                        }
                        return Token.RSH;
                    }
                    if (matchChar('=')) {
                        return Token.GE;
                    }
                    return Token.GT;

                case '*':
                    if (parser.compilerEnv.getLanguageVersion() >= Context.VERSION_ES6) {
                        if (matchChar('*')) {
                            if (matchChar('=')) {
                                return Token.ASSIGN_EXP;
                            }
                            return Token.EXP;
                        }
                    }
                    if (matchChar('=')) {
                        return Token.ASSIGN_MUL;
                    }
                    return Token.MUL;

                case '/':
                    markCommentStart();
                    // is it a // comment?
                    if (matchChar('/')) {
                        tokenStartLastLineEnd = lastLineEnd;
                        tokenStartLineno = lineno;
                        tokenBeg = cursor - 2;
                        skipLine();
                        commentType = Token.CommentType.LINE;
                        return Token.COMMENT;
                    }
                    // is it a /* or /** comment?
                    if (matchChar('*')) {
                        boolean lookForSlash = false;
                        tokenStartLastLineEnd = lastLineEnd;
                        tokenStartLineno = lineno;
                        tokenBeg = cursor - 2;
                        if (matchChar('*')) {
                            lookForSlash = true;
                            commentType = Token.CommentType.JSDOC;
                        } else {
                            commentType = Token.CommentType.BLOCK_COMMENT;
                        }
                        for (; ; ) {
                            c = getChar();
                            if (c == EOF_CHAR) {
                                tokenEnd = cursor - 1;
                                parser.addError("msg.unterminated.comment");
                                return Token.COMMENT;
                            } else if (c == '*') {
                                lookForSlash = true;
                            } else if (c == '/') {
                                if (lookForSlash) {
                                    cursor = sourceCursor;
                                    tokenEnd = cursor;
                                    return Token.COMMENT;
                                }
                            } else {
                                lookForSlash = false;
                                tokenEnd = cursor;
                            }
                        }
                    }

                    if (matchChar('=')) {
                        return Token.ASSIGN_DIV;
                    }
                    return Token.DIV;

                case '%':
                    if (matchChar('=')) {
                        return Token.ASSIGN_MOD;
                    }
                    return Token.MOD;

                case '~':
                    return Token.BITNOT;

                case '+':
                    if (matchChar('=')) {
                        return Token.ASSIGN_ADD;
                    } else if (matchChar('+')) {
                        return Token.INC;
                    } else {
                        return Token.ADD;
                    }

                case '-':
                    if (matchChar('=')) {
                        c = Token.ASSIGN_SUB;
                    } else if (matchChar('-')) {
                        if (!dirtyLine) {
                            // treat HTML end-comment after possible whitespace
                            // after line start as comment-until-eol
                            if (matchChar('>')) {
                                markCommentStart("--");
                                skipLine();
                                commentType = Token.CommentType.HTML;
                                return Token.COMMENT;
                            }
                        }
                        c = Token.DEC;
                    } else {
                        c = Token.SUB;
                    }
                    dirtyLine = true;
                    return c;

                case '`':
                    return Token.TEMPLATE_LITERAL;

                default:
                    parser.addError("msg.illegal.character", c);
                    return Token.ERROR;
            }
        }
    }