in AjaxMinDll/JavaScript/jsscanner.cs [320:1098]
public Context ScanNextToken()
{
var token = JSToken.None;
m_currentToken.StartPosition = m_currentPosition;
m_currentToken.StartLineNumber = m_currentLine;
m_currentToken.StartLinePosition = m_startLinePosition;
m_identifier.Length = 0;
m_mightBeKeyword = false;
// our case switch should be pretty efficient -- it's 9-13 and 32-126. Thsose are the most common characters
// we will find in the code for the start of tokens.
char ch = GetChar(m_currentPosition);
switch (ch)
{
case '\n':
case '\r':
token = ScanLineTerminator(ch);
break;
case '\t':
case '\v':
case '\f':
case ' ':
// we are asking for raw tokens, and this is the start of a stretch of whitespace.
// advance to the end of the whitespace, and return that as the token
token = JSToken.WhiteSpace;
while (JSScanner.IsBlankSpace(GetChar(++m_currentPosition)))
{
// increment handled by condition
}
break;
case '!':
token = JSToken.LogicalNot;
if ('=' == GetChar(++m_currentPosition))
{
token = JSToken.NotEqual;
if ('=' == GetChar(++m_currentPosition))
{
++m_currentPosition;
token = JSToken.StrictNotEqual;
}
}
break;
case '"':
case '\'':
token = JSToken.StringLiteral;
ScanString(ch);
break;
case '$':
case '_':
token = ScanIdentifier(true);
break;
case '%':
token = JSToken.Modulo;
if ('=' == GetChar(++m_currentPosition))
{
++m_currentPosition;
token = JSToken.ModuloAssign;
}
break;
case '&':
token = JSToken.BitwiseAnd;
ch = GetChar(++m_currentPosition);
if ('&' == ch)
{
++m_currentPosition;
token = JSToken.LogicalAnd;
}
else if ('=' == ch)
{
++m_currentPosition;
token = JSToken.BitwiseAndAssign;
}
break;
case '(':
token = JSToken.LeftParenthesis;
++m_currentPosition;
break;
case ')':
token = JSToken.RightParenthesis;
++m_currentPosition;
break;
case '*':
token = JSToken.Multiply;
if ('=' == GetChar(++m_currentPosition))
{
++m_currentPosition;
token = JSToken.MultiplyAssign;
}
break;
case '+':
token = JSToken.Plus;
ch = GetChar(++m_currentPosition);
if ('+' == ch)
{
++m_currentPosition;
token = JSToken.Increment;
}
else if ('=' == ch)
{
++m_currentPosition;
token = JSToken.PlusAssign;
}
break;
case ',':
token = JSToken.Comma;
++m_currentPosition;
break;
case '-':
token = JSToken.Minus;
ch = GetChar(++m_currentPosition);
if ('-' == ch)
{
++m_currentPosition;
token = JSToken.Decrement;
}
else if ('=' == ch)
{
++m_currentPosition;
token = JSToken.MinusAssign;
}
break;
case '.':
token = JSToken.AccessField;
ch = GetChar(++m_currentPosition);
if (ch == '.' && GetChar(++m_currentPosition) == '.')
{
token = JSToken.RestSpread;
++m_currentPosition;
}
else if (IsDigit(ch))
{
token = ScanNumber('.');
}
break;
case '/':
token = JSToken.Divide;
ch = GetChar(++m_currentPosition);
switch (ch)
{
case '/':
token = JSToken.SingleLineComment;
m_inSingleLineComment = true;
ch = GetChar(++m_currentPosition);
// see if there is a THIRD slash character
if (ch == '/')
{
// advance past the slash and see if we have one of our special preprocessing directives
if (GetChar(++m_currentPosition) == '#')
{
// scan preprocessing directives
token = JSToken.PreprocessorDirective;
if (!ScanPreprocessingDirective())
{
// if it returns false, we don't want to skip the rest of
// the comment line; just exit
break;
}
}
}
else if (ch == '@' && !IgnoreConditionalCompilation)
{
// we got //@
// if we have not turned on conditional-compilation yet, then check to see if that's
// what we're trying to do now.
// we are currently on the @ -- start peeking from there
if (m_conditionalCompilationOn
|| CheckSubstring(m_currentPosition + 1, "cc_on"))
{
// if the NEXT character is not an identifier character, then we need to skip
// the @ character -- otherwise leave it there
if (!IsValidIdentifierStart(m_strSourceCode, m_currentPosition + 1))
{
++m_currentPosition;
}
// we are now in a conditional comment
m_inConditionalComment = true;
token = JSToken.ConditionalCommentStart;
break;
}
}
SkipSingleLineComment();
// if we're still in a multiple-line comment, then we must've been in
// a multi-line CONDITIONAL comment, in which case this normal one-line comment
// won't turn off conditional comments just because we hit the end of line.
if (!m_inMultipleLineComment && m_inConditionalComment)
{
m_inConditionalComment = false;
token = JSToken.ConditionalCommentEnd;
}
break;
case '*':
m_inMultipleLineComment = true;
token = JSToken.MultipleLineComment;
ch = GetChar(++m_currentPosition);
if (ch == '@' && !IgnoreConditionalCompilation)
{
// we have /*@
// if we have not turned on conditional-compilation yet, then let's peek to see if the next
// few characters are cc_on -- if so, turn it on.
if (!m_conditionalCompilationOn)
{
// we are currently on the @ -- start peeking from there
if (!CheckSubstring(m_currentPosition + 1, "cc_on"))
{
// we aren't turning on conditional comments. We need to ignore this comment
// as just another multi-line comment
SkipMultilineComment();
token = JSToken.MultipleLineComment;
break;
}
}
// if the NEXT character is not an identifier character, then we need to skip
// the @ character -- otherwise leave it there
if (!IsValidIdentifierStart(m_strSourceCode, m_currentPosition + 1))
{
++m_currentPosition;
}
// we are now in a conditional comment
m_inConditionalComment = true;
token = JSToken.ConditionalCommentStart;
break;
}
else if (ch == '/')
{
// We have /*/
// advance past the slash and see if we have one of our special preprocessing directives
if (GetChar(++m_currentPosition) == '#')
{
// scan preprocessing directives. When it exits we will still be within
// the multiline comment, since none of the directives should eat the closing */.
// therefore no matter the reason we exist the scan, we always want to skip
// the rest of the multiline comment.
token = JSToken.PreprocessorDirective;
if (!ScanPreprocessingDirective())
{
// if it returns false, we fully-procesesed the comment and
// and just want to exit now.
break;
}
}
}
// skip the rest of the comment
SkipMultilineComment();
break;
case '=':
m_currentPosition++;
token = JSToken.DivideAssign;
break;
}
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
++m_currentPosition;
token = ScanNumber(ch);
break;
case ':':
token = JSToken.Colon;
++m_currentPosition;
break;
case ';':
token = JSToken.Semicolon;
++m_currentPosition;
break;
case '<':
if (AllowEmbeddedAspNetBlocks &&
'%' == GetChar(++m_currentPosition))
{
token = ScanAspNetBlock();
}
else
{
token = JSToken.LessThan;
if ('<' == GetChar(++m_currentPosition))
{
++m_currentPosition;
token = JSToken.LeftShift;
}
if ('=' == GetChar(m_currentPosition))
{
++m_currentPosition;
token = token == JSToken.LessThan
? JSToken.LessThanEqual
: JSToken.LeftShiftAssign;
}
}
break;
case '=':
token = JSToken.Assign;
if ('=' == GetChar(++m_currentPosition))
{
token = JSToken.Equal;
if ('=' == GetChar(++m_currentPosition))
{
++m_currentPosition;
token = JSToken.StrictEqual;
}
}
else if (GetChar(m_currentPosition) == '>')
{
++m_currentPosition;
token = JSToken.ArrowFunction;
}
break;
case '>':
token = JSToken.GreaterThan;
if ('>' == GetChar(++m_currentPosition))
{
token = JSToken.RightShift;
if ('>' == GetChar(++m_currentPosition))
{
++m_currentPosition;
token = JSToken.UnsignedRightShift;
}
}
if ('=' == GetChar(m_currentPosition))
{
++m_currentPosition;
token = token == JSToken.GreaterThan ? JSToken.GreaterThanEqual
: token == JSToken.RightShift ? JSToken.RightShiftAssign
: token == JSToken.UnsignedRightShift ? JSToken.UnsignedRightShiftAssign
: JSToken.Error;
}
break;
case '?':
token = JSToken.ConditionalIf;
++m_currentPosition;
break;
case '@':
if (IgnoreConditionalCompilation)
{
// if the switch to ignore conditional compilation is on, then we don't know
// anything about conditional-compilation statements, and the @-sign character
// is illegal at this spot.
++m_currentPosition;
token = IllegalCharacter();
break;
}
// see if the @-sign is immediately followed by an identifier. If it is,
// we'll see which one so we can tell if it's a conditional-compilation statement
// need to make sure the context INCLUDES the @ sign
int startPosition = ++m_currentPosition;
ScanIdentifier(false);
switch (m_currentPosition - startPosition)
{
case 0:
// look for '@*/'.
if ('*' == GetChar(m_currentPosition) && '/' == GetChar(m_currentPosition + 1))
{
m_currentPosition += 2;
m_inMultipleLineComment = false;
m_inConditionalComment = false;
token = JSToken.ConditionalCommentEnd;
break;
}
// otherwise we just have a @ sitting by itself!
// throw an error and loop back to the next token.
token = IllegalCharacter();
break;
case 2:
if (CheckSubstring(startPosition, "if"))
{
token = JSToken.ConditionalCompilationIf;
// increment the if-level
++m_conditionalCompilationIfLevel;
// if we're not in a conditional comment and we haven't explicitly
// turned on conditional compilation when we encounter
// a @if statement, then we can implicitly turn it on.
if (!m_inConditionalComment && !m_conditionalCompilationOn)
{
m_conditionalCompilationOn = true;
}
break;
}
// the string isn't a known preprocessor command, so
// fall into the default processing to handle it as a variable name
goto default;
case 3:
if (CheckSubstring(startPosition, "set"))
{
token = JSToken.ConditionalCompilationSet;
// if we're not in a conditional comment and we haven't explicitly
// turned on conditional compilation when we encounter
// a @set statement, then we can implicitly turn it on.
if (!m_inConditionalComment && !m_conditionalCompilationOn)
{
m_conditionalCompilationOn = true;
}
break;
}
if (CheckSubstring(startPosition, "end"))
{
token = JSToken.ConditionalCompilationEnd;
if (m_conditionalCompilationIfLevel > 0)
{
// down one more @if level
m_conditionalCompilationIfLevel--;
}
else
{
// not corresponding @if -- invalid @end statement
HandleError(JSError.CCInvalidEnd);
}
break;
}
// the string isn't a known preprocessor command, so
// fall into the default processing to handle it as a variable name
goto default;
case 4:
if (CheckSubstring(startPosition, "else"))
{
token = JSToken.ConditionalCompilationElse;
// if we don't have a corresponding @if statement, then throw and error
// (but keep processing)
if (m_conditionalCompilationIfLevel <= 0)
{
HandleError(JSError.CCInvalidElse);
}
break;
}
if (CheckSubstring(startPosition, "elif"))
{
token = JSToken.ConditionalCompilationElseIf;
// if we don't have a corresponding @if statement, then throw and error
// (but keep processing)
if (m_conditionalCompilationIfLevel <= 0)
{
HandleError(JSError.CCInvalidElseIf);
}
break;
}
// the string isn't a known preprocessor command, so
// fall into the default processing to handle it as a variable name
goto default;
case 5:
if (CheckSubstring(startPosition, "cc_on"))
{
// turn it on and return the @cc_on token
m_conditionalCompilationOn = true;
token = JSToken.ConditionalCompilationOn;
break;
}
// the string isn't a known preprocessor command, so
// fall into the default processing to handle it as a variable name
goto default;
default:
// we have @[id], where [id] is a valid identifier.
// if we haven't explicitly turned on conditional compilation,
// we'll keep processing, but we need to fire an error to indicate
// that the code should turn it on first.
if (!m_conditionalCompilationOn)
{
HandleError(JSError.CCOff);
}
token = JSToken.ConditionalCompilationVariable;
break;
}
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
token = ScanIdentifier(true);
break;
case '[':
token = JSToken.LeftBracket;
++m_currentPosition;
break;
case '\\':
// try decoding the unicode escape sequence and checking for a valid identifier start
token = ScanIdentifier(true);
if (token != JSToken.Identifier)
{
if (GetChar(m_currentPosition + 1) == 'u')
{
// it was a unicode escape -- move past the whole "character" and mark it as illegal
var beforePeek = m_currentPosition;
PeekUnicodeEscape(m_strSourceCode, ref m_currentPosition);
var count = m_currentPosition - beforePeek;
if (count > 1)
{
// the whole escape sequence is an invalid character
HandleError(JSError.IllegalChar);
}
else
{
// just the slash. Must not be a valid unicode escape.
// treat like a badly-escaped identifier, like: \umber
token = ScanIdentifier(true);
HandleError(JSError.BadHexEscapeSequence);
}
}
else if (IsValidIdentifierStart(m_strSourceCode, m_currentPosition + 1))
{
// if the NEXT character after the backslash is a valid identifier start
// then we're just going to assume we had something like \while,
// in which case we scan the identifier AFTER the slash
++m_currentPosition;
token = ScanIdentifier(true);
}
else
{
// the one character is illegal
++m_currentPosition;
HandleError(JSError.IllegalChar);
}
}
break;
case ']':
token = JSToken.RightBracket;
++m_currentPosition;
break;
case '^':
token = JSToken.BitwiseXor;
if ('=' == GetChar(++m_currentPosition))
{
++m_currentPosition;
token = JSToken.BitwiseXorAssign;
}
break;
case '#':
++m_currentPosition;
token = IllegalCharacter();
break;
case '`':
// start a template literal
token = ScanTemplateLiteral(ch);
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
m_mightBeKeyword = true;
token = ScanKeyword(s_Keywords[ch - 'a']);
break;
case '{':
token = JSToken.LeftCurly;
++m_currentPosition;
break;
case '|':
token = JSToken.BitwiseOr;
ch = GetChar(++m_currentPosition);
if ('|' == ch)
{
++m_currentPosition;
token = JSToken.LogicalOr;
}
else if ('=' == ch)
{
++m_currentPosition;
token = JSToken.BitwiseOrAssign;
}
break;
case '}':
// just a regular close curly-brace.
token = JSToken.RightCurly;
++m_currentPosition;
break;
case '~':
token = JSToken.BitwiseNot;
++m_currentPosition;
break;
default:
if (ch == '\0')
{
if (IsEndOfFile)
{
token = JSToken.EndOfFile;
if (m_conditionalCompilationIfLevel > 0)
{
m_currentToken.EndLineNumber = m_currentLine;
m_currentToken.EndLinePosition = m_startLinePosition;
m_currentToken.EndPosition = m_currentPosition;
HandleError(JSError.NoCCEnd);
}
}
else
{
++m_currentPosition;
token = IllegalCharacter();
}
}
else if (ch == '\u2028' || ch == '\u2029')
{
// more line terminator
token = ScanLineTerminator(ch);
}
else if (0xd800 <= ch && ch <= 0xdbff)
{
// high-surrogate
var lowSurrogate = GetChar(m_currentPosition + 1);
if (0xdc00 <= lowSurrogate && lowSurrogate <= 0xdfff)
{
// use the surrogate pair
token = ScanIdentifier(true);
if (token != JSToken.Identifier)
{
// this surrogate pair isn't the start of an identifier,
// so together they are illegal here.
m_currentPosition += 2;
token = IllegalCharacter();
}
}
else
{
// high-surrogate NOT followed by a low surrogate
++m_currentPosition;
token = IllegalCharacter();
}
}
else if (IsValidIdentifierStart(m_strSourceCode, m_currentPosition))
{
token = ScanIdentifier(true);
}
else if (IsBlankSpace(ch))
{
// we are asking for raw tokens, and this is the start of a stretch of whitespace.
// advance to the end of the whitespace, and return that as the token
token = JSToken.WhiteSpace;
while (JSScanner.IsBlankSpace(GetChar(++m_currentPosition)))
{
// increment handled in condition
}
}
else
{
++m_currentPosition;
token = IllegalCharacter();
}
break;
}
// fix up the end of the token
m_currentToken.EndLineNumber = m_currentLine;
m_currentToken.EndLinePosition = m_startLinePosition;
m_currentToken.EndPosition = m_currentPosition;
// this is now the current token
m_currentToken.Token = token;
return m_currentToken;
}