public Context ScanNextToken()

in AjaxMinDll/JavaScript/jsscanner.cs [320:1098]
594 lines of code
192 McCabe index (conditional complexity)

        public Context ScanNextToken()
        {
            var token = JSToken.None;

            m_currentToken.StartPosition = m_currentPosition;
            m_currentToken.StartLineNumber = m_currentLine;
            m_currentToken.StartLinePosition = m_startLinePosition;

            m_identifier.Length = 0;
            m_mightBeKeyword = false;

            // our case switch should be pretty efficient -- it's 9-13 and 32-126. Thsose are the most common characters 
            // we will find in the code for the start of tokens.
            char ch = GetChar(m_currentPosition);
            switch (ch)
            {
                case '\n':
                case '\r':
                    token = ScanLineTerminator(ch);
                    break;

                case '\t':
                case '\v':
                case '\f':
                case ' ':
                    // we are asking for raw tokens, and this is the start of a stretch of whitespace.
                    // advance to the end of the whitespace, and return that as the token
                    token = JSToken.WhiteSpace;
                    while (JSScanner.IsBlankSpace(GetChar(++m_currentPosition)))
                    {
                        // increment handled by condition
                    }

                    break;

                case '!':
                    token = JSToken.LogicalNot;
                    if ('=' == GetChar(++m_currentPosition))
                    {
                        token = JSToken.NotEqual;
                        if ('=' == GetChar(++m_currentPosition))
                        {
                            ++m_currentPosition;
                            token = JSToken.StrictNotEqual;
                        }
                    }

                    break;

                case '"':
                case '\'':
                    token = JSToken.StringLiteral;
                    ScanString(ch);
                    break;

                case '$':
                case '_':
                    token = ScanIdentifier(true);
                    break;

                case '%':
                    token = JSToken.Modulo;
                    if ('=' == GetChar(++m_currentPosition))
                    {
                        ++m_currentPosition;
                        token = JSToken.ModuloAssign;
                    }

                    break;

                case '&':
                    token = JSToken.BitwiseAnd;
                    ch = GetChar(++m_currentPosition);
                    if ('&' == ch)
                    {
                        ++m_currentPosition;
                        token = JSToken.LogicalAnd;
                    }
                    else if ('=' == ch)
                    {
                        ++m_currentPosition;
                        token = JSToken.BitwiseAndAssign;
                    }

                    break;

                case '(':
                    token = JSToken.LeftParenthesis;
                    ++m_currentPosition;
                    break;

                case ')':
                    token = JSToken.RightParenthesis;
                    ++m_currentPosition;
                    break;

                case '*':
                    token = JSToken.Multiply;
                    if ('=' == GetChar(++m_currentPosition))
                    {
                        ++m_currentPosition;
                        token = JSToken.MultiplyAssign;
                    }

                    break;

                case '+':
                    token = JSToken.Plus;
                    ch = GetChar(++m_currentPosition);
                    if ('+' == ch)
                    {
                        ++m_currentPosition;
                        token = JSToken.Increment;
                    }
                    else if ('=' == ch)
                    {
                        ++m_currentPosition;
                        token = JSToken.PlusAssign;
                    }

                    break;

                case ',':
                    token = JSToken.Comma;
                    ++m_currentPosition;
                    break;

                case '-':
                    token = JSToken.Minus;
                    ch = GetChar(++m_currentPosition);
                    if ('-' == ch)
                    {
                        ++m_currentPosition;
                        token = JSToken.Decrement;
                    }
                    else if ('=' == ch)
                    {
                        ++m_currentPosition;
                        token = JSToken.MinusAssign;
                    }

                    break;

                case '.':
                    token = JSToken.AccessField;
                    ch = GetChar(++m_currentPosition);
                    if (ch == '.' && GetChar(++m_currentPosition) == '.')
                    {
                        token = JSToken.RestSpread;
                        ++m_currentPosition;
                    }
                    else if (IsDigit(ch))
                    {
                        token = ScanNumber('.');
                    }

                    break;

                case '/':
                    token = JSToken.Divide;
                    ch = GetChar(++m_currentPosition);
                    switch (ch)
                    {
                        case '/':
                            token = JSToken.SingleLineComment;
                            m_inSingleLineComment = true;
                            ch = GetChar(++m_currentPosition);

                            // see if there is a THIRD slash character
                            if (ch == '/')
                            {
                                // advance past the slash and see if we have one of our special preprocessing directives
                                if (GetChar(++m_currentPosition) == '#')
                                {
                                    // scan preprocessing directives
                                    token = JSToken.PreprocessorDirective;

                                    if (!ScanPreprocessingDirective())
                                    {
                                        // if it returns false, we don't want to skip the rest of
                                        // the comment line; just exit
                                        break;
                                    }
                                }
                            }
                            else if (ch == '@' && !IgnoreConditionalCompilation)
                            {
                                // we got //@
                                // if we have not turned on conditional-compilation yet, then check to see if that's
                                // what we're trying to do now.
                                // we are currently on the @ -- start peeking from there
                                if (m_conditionalCompilationOn
                                    || CheckSubstring(m_currentPosition + 1, "cc_on"))
                                {
                                    // if the NEXT character is not an identifier character, then we need to skip
                                    // the @ character -- otherwise leave it there
                                    if (!IsValidIdentifierStart(m_strSourceCode, m_currentPosition + 1))
                                    {
                                        ++m_currentPosition;
                                    }

                                    // we are now in a conditional comment
                                    m_inConditionalComment = true;
                                    token = JSToken.ConditionalCommentStart;
                                    break;
                                }
                            }

                            SkipSingleLineComment();

                            // if we're still in a multiple-line comment, then we must've been in
                            // a multi-line CONDITIONAL comment, in which case this normal one-line comment
                            // won't turn off conditional comments just because we hit the end of line.
                            if (!m_inMultipleLineComment && m_inConditionalComment)
                            {
                                m_inConditionalComment = false;
                                token = JSToken.ConditionalCommentEnd;
                            }

                            break;

                        case '*':
                            m_inMultipleLineComment = true;
                            token = JSToken.MultipleLineComment;
                            ch = GetChar(++m_currentPosition);
                            if (ch == '@' && !IgnoreConditionalCompilation)
                            {
                                // we have /*@
                                // if we have not turned on conditional-compilation yet, then let's peek to see if the next
                                // few characters are cc_on -- if so, turn it on.
                                if (!m_conditionalCompilationOn)
                                {
                                    // we are currently on the @ -- start peeking from there
                                    if (!CheckSubstring(m_currentPosition + 1, "cc_on"))
                                    {
                                        // we aren't turning on conditional comments. We need to ignore this comment
                                        // as just another multi-line comment
                                        SkipMultilineComment();
                                        token = JSToken.MultipleLineComment;
                                        break;
                                    }
                                }

                                // if the NEXT character is not an identifier character, then we need to skip
                                // the @ character -- otherwise leave it there
                                if (!IsValidIdentifierStart(m_strSourceCode, m_currentPosition + 1))
                                {
                                    ++m_currentPosition;
                                }

                                // we are now in a conditional comment
                                m_inConditionalComment = true;
                                token = JSToken.ConditionalCommentStart;
                                break;
                            }
                            else if (ch == '/')
                            {
                                // We have /*/
                                // advance past the slash and see if we have one of our special preprocessing directives
                                if (GetChar(++m_currentPosition) == '#')
                                {
                                    // scan preprocessing directives. When it exits we will still be within
                                    // the multiline comment, since none of the directives should eat the closing */.
                                    // therefore no matter the reason we exist the scan, we always want to skip
                                    // the rest of the multiline comment.
                                    token = JSToken.PreprocessorDirective;
                                    if (!ScanPreprocessingDirective())
                                    {
                                        // if it returns false, we fully-procesesed the comment and
                                        // and just want to exit now.
                                        break;
                                    }
                                }
                            }

                            // skip the rest of the comment
                            SkipMultilineComment();
                            break;

                        case '=':
                            m_currentPosition++;
                            token = JSToken.DivideAssign;
                            break;
                    }
                    break;

                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    ++m_currentPosition;
                    token = ScanNumber(ch);
                    break;

                case ':':
                    token = JSToken.Colon;
                    ++m_currentPosition;
                    break;

                case ';':
                    token = JSToken.Semicolon;
                    ++m_currentPosition;
                    break;

                case '<':
                    if (AllowEmbeddedAspNetBlocks &&
                        '%' == GetChar(++m_currentPosition))
                    {
                        token = ScanAspNetBlock();
                    }
                    else
                    {
                        token = JSToken.LessThan;
                        if ('<' == GetChar(++m_currentPosition))
                        {
                            ++m_currentPosition;
                            token = JSToken.LeftShift;
                        }

                        if ('=' == GetChar(m_currentPosition))
                        {
                            ++m_currentPosition;
                            token = token == JSToken.LessThan
                                ? JSToken.LessThanEqual
                                : JSToken.LeftShiftAssign;
                        }
                    }
                    break;

                case '=':
                    token = JSToken.Assign;
                    if ('=' == GetChar(++m_currentPosition))
                    {
                        token = JSToken.Equal;
                        if ('=' == GetChar(++m_currentPosition))
                        {
                            ++m_currentPosition;
                            token = JSToken.StrictEqual;
                        }
                    }
                    else if (GetChar(m_currentPosition) == '>')
                    {
                        ++m_currentPosition;
                        token = JSToken.ArrowFunction;
                    }

                    break;

                case '>':
                    token = JSToken.GreaterThan;
                    if ('>' == GetChar(++m_currentPosition))
                    {
                        token = JSToken.RightShift;
                        if ('>' == GetChar(++m_currentPosition))
                        {
                            ++m_currentPosition;
                            token = JSToken.UnsignedRightShift;
                        }
                    }

                    if ('=' == GetChar(m_currentPosition))
                    {
                        ++m_currentPosition;
                        token = token == JSToken.GreaterThan ? JSToken.GreaterThanEqual
                            : token == JSToken.RightShift ? JSToken.RightShiftAssign
                            : token == JSToken.UnsignedRightShift ? JSToken.UnsignedRightShiftAssign
                            : JSToken.Error;
                    }
                    break;

                case '?':
                    token = JSToken.ConditionalIf;
                    ++m_currentPosition;
                    break;

                case '@':
                    if (IgnoreConditionalCompilation)
                    {
                        // if the switch to ignore conditional compilation is on, then we don't know
                        // anything about conditional-compilation statements, and the @-sign character
                        // is illegal at this spot.
                        ++m_currentPosition;
                        token = IllegalCharacter();
                        break;
                    }

                    // see if the @-sign is immediately followed by an identifier. If it is,
                    // we'll see which one so we can tell if it's a conditional-compilation statement
                    // need to make sure the context INCLUDES the @ sign
                    int startPosition = ++m_currentPosition;
                    ScanIdentifier(false);
                    switch (m_currentPosition - startPosition)
                    {
                        case 0:
                            // look for '@*/'.
                            if ('*' == GetChar(m_currentPosition) && '/' == GetChar(m_currentPosition + 1))
                            {
                                m_currentPosition += 2;
                                m_inMultipleLineComment = false;
                                m_inConditionalComment = false;
                                token = JSToken.ConditionalCommentEnd;
                                break;
                            }

                            // otherwise we just have a @ sitting by itself!
                            // throw an error and loop back to the next token.
                            token = IllegalCharacter();
                            break;

                        case 2:
                            if (CheckSubstring(startPosition, "if"))
                            {
                                token = JSToken.ConditionalCompilationIf;

                                // increment the if-level
                                ++m_conditionalCompilationIfLevel;

                                // if we're not in a conditional comment and we haven't explicitly
                                // turned on conditional compilation when we encounter
                                // a @if statement, then we can implicitly turn it on.
                                if (!m_inConditionalComment && !m_conditionalCompilationOn)
                                {
                                    m_conditionalCompilationOn = true;
                                }

                                break;
                            }

                            // the string isn't a known preprocessor command, so 
                            // fall into the default processing to handle it as a variable name
                            goto default;

                        case 3:
                            if (CheckSubstring(startPosition, "set"))
                            {
                                token = JSToken.ConditionalCompilationSet;

                                // if we're not in a conditional comment and we haven't explicitly
                                // turned on conditional compilation when we encounter
                                // a @set statement, then we can implicitly turn it on.
                                if (!m_inConditionalComment && !m_conditionalCompilationOn)
                                {
                                    m_conditionalCompilationOn = true;
                                }

                                break;
                            }

                            if (CheckSubstring(startPosition, "end"))
                            {
                                token = JSToken.ConditionalCompilationEnd;
                                if (m_conditionalCompilationIfLevel > 0)
                                {
                                    // down one more @if level
                                    m_conditionalCompilationIfLevel--;
                                }
                                else
                                {
                                    // not corresponding @if -- invalid @end statement
                                    HandleError(JSError.CCInvalidEnd);
                                }

                                break;
                            }

                            // the string isn't a known preprocessor command, so 
                            // fall into the default processing to handle it as a variable name
                            goto default;

                        case 4:
                            if (CheckSubstring(startPosition, "else"))
                            {
                                token = JSToken.ConditionalCompilationElse;

                                // if we don't have a corresponding @if statement, then throw and error
                                // (but keep processing)
                                if (m_conditionalCompilationIfLevel <= 0)
                                {
                                    HandleError(JSError.CCInvalidElse);
                                }

                                break;
                            }

                            if (CheckSubstring(startPosition, "elif"))
                            {
                                token = JSToken.ConditionalCompilationElseIf;

                                // if we don't have a corresponding @if statement, then throw and error
                                // (but keep processing)
                                if (m_conditionalCompilationIfLevel <= 0)
                                {
                                    HandleError(JSError.CCInvalidElseIf);
                                }

                                break;
                            }

                            // the string isn't a known preprocessor command, so 
                            // fall into the default processing to handle it as a variable name
                            goto default;

                        case 5:
                            if (CheckSubstring(startPosition, "cc_on"))
                            {
                                // turn it on and return the @cc_on token
                                m_conditionalCompilationOn = true;
                                token = JSToken.ConditionalCompilationOn;
                                break;
                            }

                            // the string isn't a known preprocessor command, so 
                            // fall into the default processing to handle it as a variable name
                            goto default;

                        default:
                            // we have @[id], where [id] is a valid identifier.
                            // if we haven't explicitly turned on conditional compilation,
                            // we'll keep processing, but we need to fire an error to indicate
                            // that the code should turn it on first.
                            if (!m_conditionalCompilationOn)
                            {
                                HandleError(JSError.CCOff);
                            }

                            token = JSToken.ConditionalCompilationVariable;
                            break;
                    }

                    break;

                case 'A':
                case 'B':
                case 'C':
                case 'D':
                case 'E':
                case 'F':
                case 'G':
                case 'H':
                case 'I':
                case 'J':
                case 'K':
                case 'L':
                case 'M':
                case 'N':
                case 'O':
                case 'P':
                case 'Q':
                case 'R':
                case 'S':
                case 'T':
                case 'U':
                case 'V':
                case 'W':
                case 'X':
                case 'Y':
                case 'Z':
                    token = ScanIdentifier(true);
                    break;

                case '[':
                    token = JSToken.LeftBracket;
                    ++m_currentPosition;
                    break;

                case '\\':
                    // try decoding the unicode escape sequence and checking for a valid identifier start
                    token = ScanIdentifier(true);
                    if (token != JSToken.Identifier)
                    {
                        if (GetChar(m_currentPosition + 1) == 'u')
                        {
                            // it was a unicode escape -- move past the whole "character" and mark it as illegal
                            var beforePeek = m_currentPosition;
                            PeekUnicodeEscape(m_strSourceCode, ref m_currentPosition);

                            var count = m_currentPosition - beforePeek;
                            if (count > 1)
                            {
                                // the whole escape sequence is an invalid character
                                HandleError(JSError.IllegalChar);
                            }
                            else
                            {
                                // just the slash. Must not be a valid unicode escape.
                                // treat like a badly-escaped identifier, like: \umber
                                token = ScanIdentifier(true);
                                HandleError(JSError.BadHexEscapeSequence);
                            }
                        }
                        else if (IsValidIdentifierStart(m_strSourceCode, m_currentPosition + 1))
                        {
                            // if the NEXT character after the backslash is a valid identifier start
                            // then we're just going to assume we had something like \while,
                            // in which case we scan the identifier AFTER the slash
                            ++m_currentPosition;
                            token = ScanIdentifier(true);
                        }
                        else
                        {
                            // the one character is illegal
                            ++m_currentPosition;
                            HandleError(JSError.IllegalChar);
                        }
                    }
                    break;

                case ']':
                    token = JSToken.RightBracket;
                    ++m_currentPosition;
                    break;

                case '^':
                    token = JSToken.BitwiseXor;
                    if ('=' == GetChar(++m_currentPosition))
                    {
                        ++m_currentPosition;
                        token = JSToken.BitwiseXorAssign;
                    }

                    break;

                case '#':
                    ++m_currentPosition;
                    token = IllegalCharacter();
                    break;

                case '`':
                    // start a template literal
                    token = ScanTemplateLiteral(ch);
                    break;

                case 'a':
                case 'b':
                case 'c':
                case 'd':
                case 'e':
                case 'f':
                case 'g':
                case 'h':
                case 'i':
                case 'j':
                case 'k':
                case 'l':
                case 'm':
                case 'n':
                case 'o':
                case 'p':
                case 'q':
                case 'r':
                case 's':
                case 't':
                case 'u':
                case 'v':
                case 'w':
                case 'x':
                case 'y':
                case 'z':
                    m_mightBeKeyword = true;
                    token = ScanKeyword(s_Keywords[ch - 'a']);
                    break;

                case '{':
                    token = JSToken.LeftCurly;
                    ++m_currentPosition;
                    break;

                case '|':
                    token = JSToken.BitwiseOr;
                    ch = GetChar(++m_currentPosition);
                    if ('|' == ch)
                    {
                        ++m_currentPosition;
                        token = JSToken.LogicalOr;
                    }
                    else if ('=' == ch)
                    {
                        ++m_currentPosition;
                        token = JSToken.BitwiseOrAssign;
                    }
                    break;

                case '}':
                    // just a regular close curly-brace.
                    token = JSToken.RightCurly;
                    ++m_currentPosition;
                    break;

                case '~':
                    token = JSToken.BitwiseNot;
                    ++m_currentPosition;
                    break;

                default:
                    if (ch == '\0')
                    {
                        if (IsEndOfFile)
                        {
                            token = JSToken.EndOfFile;
                            if (m_conditionalCompilationIfLevel > 0)
                            {
                                m_currentToken.EndLineNumber = m_currentLine;
                                m_currentToken.EndLinePosition = m_startLinePosition;
                                m_currentToken.EndPosition = m_currentPosition;
                                HandleError(JSError.NoCCEnd);
                            }
                        }
                        else
                        {
                            ++m_currentPosition;
                            token = IllegalCharacter();
                        }
                    }
                    else if (ch == '\u2028' || ch == '\u2029')
                    {
                        // more line terminator
                        token = ScanLineTerminator(ch);
                    }
                    else if (0xd800 <= ch && ch <= 0xdbff)
                    {
                        // high-surrogate
                        var lowSurrogate = GetChar(m_currentPosition + 1);
                        if (0xdc00 <= lowSurrogate && lowSurrogate <= 0xdfff)
                        {
                            // use the surrogate pair
                            token = ScanIdentifier(true);
                            if (token != JSToken.Identifier)
                            {
                                // this surrogate pair isn't the start of an identifier,
                                // so together they are illegal here.
                                m_currentPosition += 2;
                                token = IllegalCharacter();
                            }
                        }
                        else
                        {
                            // high-surrogate NOT followed by a low surrogate
                            ++m_currentPosition;
                            token = IllegalCharacter();
                        }
                    }
                    else if (IsValidIdentifierStart(m_strSourceCode, m_currentPosition))
                    {
                        token = ScanIdentifier(true);
                    }
                    else if (IsBlankSpace(ch))
                    {
                        // we are asking for raw tokens, and this is the start of a stretch of whitespace.
                        // advance to the end of the whitespace, and return that as the token
                        token = JSToken.WhiteSpace;
                        while (JSScanner.IsBlankSpace(GetChar(++m_currentPosition)))
                        {
                            // increment handled in condition
                        }
                    }
                    else
                    {
                        ++m_currentPosition;
                        token = IllegalCharacter();
                    }

                    break;
            }

            // fix up the end of the token
            m_currentToken.EndLineNumber = m_currentLine;
            m_currentToken.EndLinePosition = m_startLinePosition;
            m_currentToken.EndPosition = m_currentPosition;

            // this is now the current token
            m_currentToken.Token = token;
            return m_currentToken;
        }