in src/Shared/LanguageParser/CSharptokenEnumerator.cs [41:294]
override internal bool FindNextToken()
{
int startPosition = _reader.Position;
// Dealing with whitespace?
if (_reader.SinkMultipleWhiteSpace())
{
current = new WhitespaceToken();
return true;
}
// Check for one-line comment
else if (_reader.Sink("//"))
{
// Looks like a one-line comment. Follow it to the End-of-line
_reader.SinkToEndOfLine();
current = new CommentToken();
return true;
}
// Check for multi-line comment
else if (_reader.Sink("/*"))
{
_reader.SinkUntil("*/");
// Was the ending */ found?
if (_reader.EndOfLines)
{
// No. There was a /* without a */. Return this a syntax error token.
current = new CSharpTokenizer.EndOfFileInsideCommentToken();
return true;
}
current = new CommentToken();
return true;
}
// Handle chars
else if (_reader.Sink("\'"))
{
while (_reader.CurrentCharacter != '\'')
{
if (_reader.Sink("\\"))
{
/* reader.Skip the escape sequence.
This isn't exactly right. We should detect:
simple-escape-sequence: one of
\' \" \\ \0 \a \b \f \n \r \t \v
hexadecimal-escape-sequence:
\x hex-digit hex-digit[opt] hex-digit[opt] hex-digit[opt]
*/
}
_reader.SinkCharacter();
}
if (_reader.SinkCharacter() != '\'')
{
Debug.Assert(false, "Code defect in tokenizer: Should have yielded a closing tick.");
}
current = new CSharpTokenizer.CharLiteralToken();
return true;
}
// Check for verbatim string
else if (_reader.Sink("@\""))
{
do
{
// Inside a verbatim string "" is treated as a special character
while (_reader.Sink("\"\""))
{
}
}
while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"');
// Can't end a file inside a string
if (_reader.EndOfLines)
{
current = new EndOfFileInsideStringToken();
return true;
}
// reader.Skip the ending quote.
current = new StringLiteralToken();
current.InnerText = _reader.GetCurrentMatchedString(startPosition).Substring(1);
return true;
}
// Check for a quoted string.
else if (_reader.Sink("\""))
{
while (_reader.CurrentCharacter == '\\' || _reader.MatchRegularStringLiteral())
{
// See if we have an escape sequence.
if (_reader.SinkCharacter() == '\\')
{
// This is probably an escape character.
if (_reader.SinkStringEscape())
{
// This isn't nearly right. We just do barely enough to make a string
// with an embedded escape sequence return _some_ string whose start and
// end match the real bounds of the string.
}
else
{
// This is a compiler error.
_reader.SinkCharacter();
current = new CSharpTokenizer.UnrecognizedStringEscapeToken();
return true;
}
}
}
// Is it a newline?
if (TokenChar.IsNewLine(_reader.CurrentCharacter))
{
current = new CSharpTokenizer.NewlineInsideStringToken();
return true;
}
// Create the token.
if (_reader.SinkCharacter() != '\"')
{
Debug.Assert(false, "Defect in tokenizer: Should have yielded a terminating quote.");
}
current = new StringLiteralToken();
return true;
}
// Identifier or keyword?
else if
(
// From 2.4.2 Identifiers: A '@' can be used to prefix an identifier so that a keyword can be used as an identifier.
_reader.CurrentCharacter == '@' ||
_reader.MatchNextIdentifierStart()
)
{
if (_reader.CurrentCharacter == '@')
{
_reader.SinkCharacter();
}
// Now, the next character must be an identifier start.
if (!_reader.SinkIdentifierStart())
{
current = new ExpectedIdentifierToken();
return true;
}
// Sink the rest of the identifier.
while (_reader.SinkIdentifierPart())
{
}
string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition);
switch (identifierOrKeyword)
{
default:
if (Array.IndexOf(s_keywordList, identifierOrKeyword) >= 0)
{
current = new KeywordToken();
return true;
}
// If the identifier starts with '@' then we need to strip it off.
// The '@' is for escaping so that we can have an identifier called
// the same thing as a reserved keyword (i.e. class, if, foreach, etc)
string identifier = _reader.GetCurrentMatchedString(startPosition);
if (identifier.StartsWith("@", StringComparison.Ordinal))
{
identifier = identifier.Substring(1);
}
// Create the token.
current = new IdentifierToken();
current.InnerText = identifier;
return true;
case "false":
case "true":
current = new BooleanLiteralToken();
return true;
case "null":
current = new CSharpTokenizer.NullLiteralToken();
return true;
}
}
// Open scope
else if (_reader.Sink("{"))
{
current = new CSharpTokenizer.OpenScopeToken();
return true;
}
// Close scope
else if (_reader.Sink("}"))
{
current = new CSharpTokenizer.CloseScopeToken();
return true;
}
// Hexidecimal integer literal
else if (_reader.SinkIgnoreCase("0x"))
{
// Sink the hex digits.
if (!_reader.SinkMultipleHexDigits())
{
current = new ExpectedValidHexDigitToken();
return true;
}
// Skip the L, U, l, u, ul, etc.
_reader.SinkLongIntegerSuffix();
current = new HexIntegerLiteralToken();
return true;
}
// Decimal integer literal
else if (_reader.SinkMultipleDecimalDigits())
{
// reader.Skip the L, U, l, u, ul, etc.
_reader.SinkLongIntegerSuffix();
current = new DecimalIntegerLiteralToken();
return true;
}
// Check for single-digit operators and punctuators
else if (_reader.SinkOperatorOrPunctuator())
{
current = new OperatorOrPunctuatorToken();
return true;
}
// Preprocessor line
else if (_reader.CurrentCharacter == '#')
{
if (_reader.Sink("#if"))
{
current = new OpenConditionalDirectiveToken();
}
else if (_reader.Sink("#endif"))
{
current = new CloseConditionalDirectiveToken();
}
else
{
current = new PreprocessorToken();
}
_reader.SinkToEndOfLine();
return true;
}
// We didn't recognize the token, so this is a syntax error.
_reader.SinkCharacter();
current = new UnrecognizedToken();
return true;
}