in src/Parsing/Impl/Tokens/Tokenizer.cs [403:585]
private Token Next() {
var at_beginning = AtBeginning;
if (_state.IncompleteString != null && Peek() != EOF) {
var prev = _state.IncompleteString;
_state.IncompleteString = null;
return ContinueString(prev.IsSingleTickQuote ? '\'' : '"', prev.IsRaw, prev.IsUnicode, false, prev.IsTripleQuoted, prev.IsFormatted, 0);
}
DiscardToken();
var ch = NextChar();
while (true) {
switch (ch) {
case EOF:
return ReadEof();
case '\f':
// Ignore form feeds
if (Verbatim) {
_state.CurWhiteSpace.Append((char)ch);
}
DiscardToken();
ch = NextChar();
break;
case ' ':
case '\t':
ch = SkipWhiteSpace(ch, at_beginning);
break;
case '#':
_commentLocations.Add(CurrentPosition.AddColumns(-1));
if ((_options & (TokenizerOptions.VerbatimCommentsAndLineJoins | TokenizerOptions.Verbatim)) != 0) {
var commentRes = ReadSingleLineComment(out ch);
if ((_options & TokenizerOptions.VerbatimCommentsAndLineJoins) == 0) {
_state.CurWhiteSpace.Append(commentRes.VerbatimImage);
DiscardToken();
SeekRelative(+1);
} else {
return commentRes;
}
} else {
ch = SkipSingleLineComment();
}
break;
case '\\':
NewLineKind nlKind;
var nextChar = NextChar();
if ((nlKind = ReadEolnOpt(nextChar)) != NewLineKind.None) {
_newLineLocations.Add(new NewLineLocation(CurrentIndex, nlKind));
if ((_options & TokenizerOptions.VerbatimCommentsAndLineJoins) != 0) {
// report the explicit line join
MarkTokenEnd();
return new VerbatimToken(TokenKind.ExplicitLineJoin, "\\" + nlKind.GetString(), "<explicit line join>");
} else {
DiscardToken();
// discard token '\\<eoln>':
if (_state.CurWhiteSpace != null) {
_state.CurWhiteSpace.Append('\\');
_state.CurWhiteSpace.Append(nlKind.GetString());
}
}
ch = NextChar();
if (ch == -1) {
EndContinues = true;
}
break;
} else {
if (nextChar == -1) {
EndContinues = true;
MarkTokenEnd();
return new VerbatimToken(TokenKind.EndOfFile, "\\", "<eof>");
}
BufferBack();
goto default;
}
case '\"':
case '\'':
_state.LastNewLine = false;
return ReadString((char)ch, false, false, false, false);
case 'u':
case 'U':
_state.LastNewLine = false;
// The u prefix was reintroduced to Python 3.3 in PEP 414
if (LanguageVersion.Is2x() || LanguageVersion >= PythonLanguageVersion.V33) {
return ReadNameOrUnicodeString();
}
return ReadName();
case 'r':
case 'R':
_state.LastNewLine = false;
return ReadNameOrRawString();
case 'b':
case 'B':
_state.LastNewLine = false;
if (LanguageVersion >= PythonLanguageVersion.V26) {
return ReadNameOrBytes();
}
return ReadName();
case 'f':
case 'F':
_state.LastNewLine = false;
if (LanguageVersion >= PythonLanguageVersion.V36) {
return ReadNameOrFormattedString();
}
return ReadName();
case '_':
_state.LastNewLine = false;
return ReadName();
case '.':
_state.LastNewLine = false;
ch = Peek();
if (ch >= '0' && ch <= '9') {
return ReadFraction();
} else if (ch == '.' && (StubFile || LanguageVersion.Is3x())) {
NextChar();
if (Peek() == '.') {
NextChar();
MarkTokenEnd();
return Tokens.Ellipsis;
} else {
BufferBack();
}
}
MarkTokenEnd();
return Tokens.DotToken;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
_state.LastNewLine = false;
return ReadNumber(ch);
default:
if ((nlKind = ReadEolnOpt(ch)) > 0) {
_newLineLocations.Add(new NewLineLocation(CurrentIndex, nlKind));
// token marked by the callee:
if (ReadIndentationAfterNewLine(nlKind)) {
return NewLineKindToToken(nlKind, _state.LastNewLine);
}
// we're in a grouping, white space is ignored
DiscardToken();
ch = NextChar();
break;
}
_state.LastNewLine = false;
var res = NextOperator(ch);
if (res != null) {
if (res is StatementSymbolToken) {
return TransformStatementToken(res);
}
MarkTokenEnd();
return res;
}
if (IsNameStart(ch)) {
return ReadName();
}
MarkTokenEnd();
return BadChar(ch);
}
}
}