in Public/Src/FrontEnd/TypeScript.Net/TypeScript.Net/Scanning/Scanner.cs [869:1461]
public SyntaxKind Scan()
{
m_startPos = m_pos;
m_hasExtendedUnicodeEscape = false;
m_precedingLineBreak = false;
m_tokenIsUnterminated = false;
while (true)
{
m_tokenPos = m_pos;
if (m_pos >= m_end)
{
AddLineEnding(m_lineMap, m_lineStart);
return m_token = SyntaxKind.EndOfFileToken;
}
var ch = m_text.CharCodeAt(m_pos);
// Special handling for shebang
if (ch == CharacterCodes.Hash && m_pos == 0 && IsShebangTrivia(m_text, m_pos))
{
m_pos = ScanShebangTrivia(m_text, m_pos);
if (m_needSkipTrivia)
{
continue;
}
return m_token = SyntaxKind.ShebangTrivia;
}
if (ch > CharacterCodes.MaxAsciiCharacter && IsLineBreak(ch))
{
AddLineEnding(m_lineMap, m_lineStart);
m_lineStart = m_pos + 1;
}
// Pos is always called pointing to the next char after c
void CheckCrLf(CharacterCodes c, int pos)
{
switch (c)
{
case CharacterCodes.CarriageReturn:
AddLineEnding(m_lineMap, m_lineStart);
// In the case of CrLf, we record both altogether
if (m_text.CharCodeAt(pos) == CharacterCodes.LineFeed)
{
m_lineStart = pos + 1;
}
else
{
m_lineStart = pos;
}
break;
case CharacterCodes.LineFeed:
// The scanner will reach this case only for multiline comments
// But in that case we check that we haven't considered both CrLf already
if (pos < 2 || m_text.CharCodeAt(pos - 2) != CharacterCodes.CarriageReturn)
{
AddLineEnding(m_lineMap, m_lineStart);
m_lineStart = pos;
}
break;
}
}
switch (ch)
{
case CharacterCodes.CarriageReturn:
case CharacterCodes.LineFeed:
CheckCrLf(ch, m_pos + 1);
m_precedingLineBreak = true;
if (m_needSkipTrivia)
{
m_pos++;
continue;
}
if (ch == CharacterCodes.CarriageReturn && m_pos + 1 < m_end &&
m_text.CharCodeAt(m_pos + 1) == CharacterCodes.LineFeed)
{
// consume both CR and LF
m_pos += 2;
}
else
{
m_pos++;
}
m_newLineTriviaCount++;
AssociateTrailingCommentsWithLastTrivia();
return m_token = SyntaxKind.NewLineTrivia;
case CharacterCodes.Tab:
case CharacterCodes.VerticalTab:
case CharacterCodes.FormFeed:
case CharacterCodes.Space:
if (m_needSkipTrivia)
{
m_pos++;
continue;
}
while (m_pos < m_end && IsWhiteSpace(m_text.CharCodeAt(m_pos)))
{
m_pos++;
}
return m_token = SyntaxKind.WhitespaceTrivia;
case CharacterCodes.Exclamation:
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
if (m_text.CharCodeAt(m_pos + 2) == CharacterCodes.equals)
{
m_pos += 3;
return m_token = SyntaxKind.ExclamationEqualsEqualsToken;
}
m_pos += 2;
return m_token = SyntaxKind.ExclamationEqualsToken;
}
m_pos++;
return m_token = SyntaxKind.ExclamationToken;
case CharacterCodes.DoubleQuote:
case CharacterCodes.SingleQuote:
m_tokenValue = ScanString();
return m_token = SyntaxKind.StringLiteral;
case CharacterCodes.Backtick:
{
// DScript-specific. We retrieve the factory name from the last parsed
// token. Note that this can be null (e.g. FirstToken). And we know
// it can only be a factory name if it is an identifier.
var factoryName = m_token == SyntaxKind.Identifier ? m_tokenValue : null;
// Backslashes are allowed if it is a DScript path-like interpolation factory and the general configuration flag
// allows them
var backslashesAreAllowed = m_allowBackslashesInPathInterpolation && IsPathLikeInterpolationFactory(factoryName);
return m_token = ScanTemplateAndSetTokenValue(backslashesAreAllowed);
}
case CharacterCodes.Percent:
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
m_pos += 2;
return m_token = SyntaxKind.PercentEqualsToken;
}
m_pos++;
return m_token = SyntaxKind.PercentToken;
case CharacterCodes.Ampersand:
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.Ampersand)
{
m_pos += 2;
return m_token = SyntaxKind.AmpersandAmpersandToken;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
m_pos += 2;
return m_token = SyntaxKind.AmpersandEqualsToken;
}
m_pos++;
return m_token = SyntaxKind.AmpersandToken;
case CharacterCodes.OpenParen:
m_pos++;
return m_token = SyntaxKind.OpenParenToken;
case CharacterCodes.CloseParen:
m_pos++;
return m_token = SyntaxKind.CloseParenToken;
case CharacterCodes.Asterisk:
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
m_pos += 2;
return m_token = SyntaxKind.AsteriskEqualsToken;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.Asterisk)
{
if (m_text.CharCodeAt(m_pos + 2) == CharacterCodes.equals)
{
m_pos += 3;
return m_token = SyntaxKind.AsteriskAsteriskEqualsToken;
}
m_pos += 2;
return m_token = SyntaxKind.AsteriskAsteriskToken;
}
m_pos++;
return m_token = SyntaxKind.AsteriskToken;
case CharacterCodes.Plus:
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.Plus)
{
m_pos += 2;
return m_token = SyntaxKind.PlusPlusToken;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
m_pos += 2;
return m_token = SyntaxKind.PlusEqualsToken;
}
m_pos++;
return m_token = SyntaxKind.PlusToken;
case CharacterCodes.Comma:
m_pos++;
return m_token = SyntaxKind.CommaToken;
case CharacterCodes.Minus:
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.Minus)
{
m_pos += 2;
return m_token = SyntaxKind.MinusMinusToken;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
m_pos += 2;
return m_token = SyntaxKind.MinusEqualsToken;
}
m_pos++;
return m_token = SyntaxKind.MinusToken;
case CharacterCodes.Dot:
if (IsDigit(m_text.CharCodeAt(m_pos + 1)))
{
m_tokenValue = ScanNumber();
return m_token = SyntaxKind.NumericLiteral;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.Dot &&
m_text.CharCodeAt(m_pos + 2) == CharacterCodes.Dot)
{
m_pos += 3;
return m_token = SyntaxKind.DotDotDotToken;
}
m_pos++;
return m_token = SyntaxKind.DotToken;
case CharacterCodes.Slash:
// Single-line comment
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.Slash)
{
m_pos += 2;
while (m_pos < m_end)
{
if (IsLineBreak(m_text.CharCodeAt(m_pos)))
{
break;
}
m_pos++;
}
if (!m_needSkipTrivia)
{
m_comments.Add(new Trivia.Comment(TokenText, isMultiLine: false));
}
if (m_preserveComments)
{
return m_token = SyntaxKind.SingleLineCommentTrivia;
}
continue;
}
// Multi-line comment
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.Asterisk)
{
m_pos += 2;
var commentClosed = false;
while (m_pos < m_end)
{
var ch0 = m_text.CharCodeAt(m_pos);
if (ch0 == CharacterCodes.Asterisk && m_text.CharCodeAt(m_pos + 1) == CharacterCodes.Slash)
{
m_pos += 2;
commentClosed = true;
break;
}
if (IsLineBreak(ch0))
{
CheckCrLf(ch0, m_pos + 1);
m_precedingLineBreak = true;
}
m_pos++;
}
if (!commentClosed)
{
Error(Errors.Asterisk_Slash_expected);
}
if (!m_needSkipTrivia)
{
m_comments.Add(new Trivia.Comment(TokenText, isMultiLine: true));
m_tokenIsUnterminated = !commentClosed;
}
if (m_preserveComments)
{
return m_token = SyntaxKind.MultiLineCommentTrivia;
}
continue;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
m_pos += 2;
return m_token = SyntaxKind.SlashEqualsToken;
}
m_pos++;
return m_token = SyntaxKind.SlashToken;
case CharacterCodes._0:
if (m_pos + 2 < m_end &&
(m_text.CharCodeAt(m_pos + 1) == CharacterCodes.X || m_text.CharCodeAt(m_pos + 1) == CharacterCodes.x))
{
m_pos += 2;
var value = ScanMinimumNumberOfHexDigits(1);
if (value < 0)
{
Error(Errors.Hexadecimal_digit_expected);
value = 0;
}
m_tokenValue = value.ToString();
return m_token = SyntaxKind.NumericLiteral;
}
if (m_pos + 2 < m_end &&
(m_text.CharCodeAt(m_pos + 1) == CharacterCodes.B ||
m_text.CharCodeAt(m_pos + 1) == CharacterCodes.b))
{
m_pos += 2;
var value = ScanBinaryOrOctalDigits(/* base */ 2);
if (value < 0)
{
Error(Errors.Binary_digit_expected);
value = 0;
}
m_tokenValue = value.ToString();
return m_token = SyntaxKind.NumericLiteral;
}
if (m_pos + 2 < m_end &&
(m_text.CharCodeAt(m_pos + 1) == CharacterCodes.O ||
m_text.CharCodeAt(m_pos + 1) == CharacterCodes.o))
{
m_pos += 2;
var value = ScanBinaryOrOctalDigits(/* base */ 8);
if (value < 0)
{
Error(Errors.Octal_digit_expected);
value = 0;
}
m_tokenValue = value.ToString();
return m_token = SyntaxKind.NumericLiteral;
}
// Try to parse as an octal
if (m_pos + 1 < m_end && IsOctalDigit(m_text.CharCodeAt(m_pos + 1)))
{
m_tokenValue = ScanOctalDigits().ToString();
return m_token = SyntaxKind.NumericLiteral;
}
// This fall-through is a deviation from the EcmaScript grammar. The grammar says that a leading zero
// can only be followed by an octal digit, a dot, or the end of the int literal. However, we are being
// permissive and allowing decimal digits of the form 08* and 09* (which many browsers also do).
goto case CharacterCodes._1;
case CharacterCodes._1:
case CharacterCodes._2:
case CharacterCodes._3:
case CharacterCodes._4:
case CharacterCodes._5:
case CharacterCodes._6:
case CharacterCodes._7:
case CharacterCodes._8:
case CharacterCodes._9:
m_tokenValue = ScanNumber();
return m_token = SyntaxKind.NumericLiteral;
case CharacterCodes.Colon:
m_pos++;
return m_token = SyntaxKind.ColonToken;
case CharacterCodes.Semicolon:
m_pos++;
return m_token = SyntaxKind.SemicolonToken;
case CharacterCodes.LessThan:
if (IsConflictMarkerTrivia(m_text, m_pos))
{
m_pos = ScanConflictMarkerTrivia(m_text, m_pos, Error);
if (m_needSkipTrivia)
{
continue;
}
return m_token = SyntaxKind.ConflictMarkerTrivia;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.LessThan)
{
if (m_text.CharCodeAt(m_pos + 2) == CharacterCodes.equals)
{
m_pos += 3;
return m_token = SyntaxKind.LessThanLessThanEqualsToken;
}
m_pos += 2;
return m_token = SyntaxKind.LessThanLessThanToken;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
m_pos += 2;
return m_token = SyntaxKind.LessThanEqualsToken;
}
if (m_languageVariant == LanguageVariant.Jsx &&
m_text.CharCodeAt(m_pos + 1) == CharacterCodes.Slash &&
m_text.CharCodeAt(m_pos + 2) != CharacterCodes.Asterisk)
{
m_pos += 2;
return m_token = SyntaxKind.LessThanSlashToken;
}
m_pos++;
return m_token = SyntaxKind.LessThanToken;
case CharacterCodes.equals:
if (IsConflictMarkerTrivia(m_text, m_pos))
{
m_pos = ScanConflictMarkerTrivia(m_text, m_pos, Error);
if (m_needSkipTrivia)
{
continue;
}
return m_token = SyntaxKind.ConflictMarkerTrivia;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
if (m_text.CharCodeAt(m_pos + 2) == CharacterCodes.equals)
{
m_pos += 3;
return m_token = SyntaxKind.EqualsEqualsEqualsToken;
}
m_pos += 2;
return m_token = SyntaxKind.EqualsEqualsToken;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.GreaterThan)
{
m_pos += 2;
return m_token = SyntaxKind.EqualsGreaterThanToken;
}
m_pos++;
return m_token = SyntaxKind.EqualsToken;
case CharacterCodes.GreaterThan:
if (IsConflictMarkerTrivia(m_text, m_pos))
{
m_pos = ScanConflictMarkerTrivia(m_text, m_pos, Error);
if (m_needSkipTrivia)
{
continue;
}
return m_token = SyntaxKind.ConflictMarkerTrivia;
}
m_pos++;
return m_token = SyntaxKind.GreaterThanToken;
case CharacterCodes.Question:
m_pos++;
return m_token = SyntaxKind.QuestionToken;
case CharacterCodes.OpenBracket:
m_pos++;
return m_token = SyntaxKind.OpenBracketToken;
case CharacterCodes.CloseBracket:
m_pos++;
return m_token = SyntaxKind.CloseBracketToken;
case CharacterCodes.Caret:
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
m_pos += 2;
return m_token = SyntaxKind.CaretEqualsToken;
}
m_pos++;
return m_token = SyntaxKind.CaretToken;
case CharacterCodes.OpenBrace:
m_pos++;
return m_token = SyntaxKind.OpenBraceToken;
case CharacterCodes.Bar:
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.Bar)
{
m_pos += 2;
return m_token = SyntaxKind.BarBarToken;
}
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.equals)
{
m_pos += 2;
return m_token = SyntaxKind.BarEqualsToken;
}
m_pos++;
return m_token = SyntaxKind.BarToken;
case CharacterCodes.CloseBrace:
m_pos++;
return m_token = SyntaxKind.CloseBraceToken;
case CharacterCodes.Tilde:
m_pos++;
return m_token = SyntaxKind.TildeToken;
case CharacterCodes.At:
// DS: in DScript ambient decorators could be used with @@ syntax.
if (m_text.CharCodeAt(m_pos + 1) == CharacterCodes.At)
{
m_pos += 2;
return m_token = SyntaxKind.AtToken;
}
m_pos++;
return m_token = SyntaxKind.AtToken;
case CharacterCodes.Backslash:
var cookedChar = (CharacterCodes)PeekUnicodeEscape();
if (cookedChar >= 0 && IsIdentifierStart(cookedChar, m_languageVersion))
{
m_pos += 6;
m_tokenValue = cookedChar.FromCharCode() + ScanIdentifierParts();
return m_token = GetIdentifierToken();
}
Error(Errors.Invalid_character);
m_pos++;
return m_token = SyntaxKind.Unknown;
default:
if (IsIdentifierStart(ch, m_languageVersion))
{
m_pos++;
while (m_pos < m_end && IsIdentifierPart(ch = m_text.CharCodeAt(m_pos), m_languageVersion))
{
m_pos++;
}
m_tokenValue = m_text.SubstringFromTo(m_tokenPos, m_pos);
if (ch == CharacterCodes.Backslash)
{
m_tokenValue += ScanIdentifierParts();
}
return m_token = GetIdentifierToken();
}
if (IsWhiteSpace(ch))
{
m_pos++;
continue;
}
if (IsLineBreak(ch))
{
m_newLineTriviaCount++;
m_precedingLineBreak = true;
m_pos++;
continue;
}
Error(Errors.Invalid_character);
m_pos++;
return m_token = SyntaxKind.Unknown;
}
}
}