in src/Compilers/CSharp/Portable/Parser/Lexer.cs [1908:2147]
private bool ScanIdentifier_CrefSlowPath(ref TokenInfo info)
{
Debug.Assert(InXmlCrefOrNameAttributeValue);
int start = TextWindow.Position;
this.ResetIdentBuffer();
if (AdvanceIfMatches('@'))
{
// In xml name attribute values, the '@' is part of the value text of the identifier
// (to match dev11).
if (InXmlNameAttributeValue)
{
AddIdentChar('@');
}
else
{
info.IsVerbatim = true;
}
}
while (true)
{
int beforeConsumed = TextWindow.Position;
char consumedChar;
char consumedSurrogate;
if (TextWindow.PeekChar() == '&')
{
if (!TextWindow.TryScanXmlEntity(out consumedChar, out consumedSurrogate))
{
// If it's not a valid entity, then it's not part of the identifier.
TextWindow.Reset(beforeConsumed);
goto LoopExit;
}
}
else
{
consumedChar = TextWindow.NextChar();
consumedSurrogate = SlidingTextWindow.InvalidCharacter;
}
// NOTE: If the surrogate is non-zero, then consumedChar won't match
// any of the cases below (UTF-16 guarantees that members of surrogate
// pairs aren't separately valid).
bool isEscaped = false;
top:
switch (consumedChar)
{
case '\\':
// NOTE: For completeness, we should allow xml entities in unicode escape
// sequences (DevDiv #16321). Since it is not currently a priority, we will
// try to make the interim behavior sensible: we will only attempt to scan
// a unicode escape if NONE of the characters are XML entities (including
// the backslash, which we have already consumed).
// When we're ready to implement this behavior, we can drop the position
// check and use AdvanceIfMatches instead of PeekChar.
if (!isEscaped && (TextWindow.Position == beforeConsumed + 1) &&
(TextWindow.PeekChar() == 'u' || TextWindow.PeekChar() == 'U'))
{
Debug.Assert(consumedSurrogate == SlidingTextWindow.InvalidCharacter, "Since consumedChar == '\\'");
info.HasIdentifierEscapeSequence = true;
TextWindow.Reset(beforeConsumed);
// ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape)
isEscaped = true;
SyntaxDiagnosticInfo error;
consumedChar = TextWindow.NextUnicodeEscape(out consumedSurrogate, out error);
AddCrefError(error);
goto top;
}
goto default;
case '_':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
{
// Again, these are the 'common' identifier characters...
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
{
if (_identLen == 0)
{
TextWindow.Reset(beforeConsumed);
goto LoopExit;
}
// Again, these are the 'common' identifier characters...
break;
}
case ' ':
case '$':
case '\t':
case '.':
case ';':
case '(':
case ')':
case ',':
case '<':
// ...and these are the 'common' stop characters.
TextWindow.Reset(beforeConsumed);
goto LoopExit;
case SlidingTextWindow.InvalidCharacter:
if (!TextWindow.IsReallyAtEnd())
{
goto default;
}
TextWindow.Reset(beforeConsumed);
goto LoopExit;
default:
{
// This is the 'expensive' call
if (_identLen == 0 && consumedChar > 127 && SyntaxFacts.IsIdentifierStartCharacter(consumedChar))
{
break;
}
else if (_identLen > 0 && consumedChar > 127 && SyntaxFacts.IsIdentifierPartCharacter(consumedChar))
{
//// BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs
if (UnicodeCharacterUtilities.IsFormattingChar(consumedChar))
{
continue; // Ignore formatting characters
}
break;
}
else
{
// Not a valid identifier character, so bail.
TextWindow.Reset(beforeConsumed);
goto LoopExit;
}
}
}
this.AddIdentChar(consumedChar);
if (consumedSurrogate != SlidingTextWindow.InvalidCharacter)
{
this.AddIdentChar(consumedSurrogate);
}
}
LoopExit:
if (_identLen > 0)
{
// NOTE: If we don't intern the string value, then we won't get a hit
// in the keyword dictionary! (It searches for a key using identity.)
// The text does not have to be interned (and probably shouldn't be
// if it contains entities (else-case).
var width = TextWindow.Width; // exact size of input characters
// id buffer is identical to width in input
if (_identLen == width)
{
info.StringValue = TextWindow.GetInternedText();
info.Text = info.StringValue;
}
else
{
info.StringValue = TextWindow.Intern(_identBuffer, 0, _identLen);
info.Text = TextWindow.GetText(intern: false);
}
return true;
}
else
{
info.Text = null;
info.StringValue = null;
TextWindow.Reset(start);
return false;
}
}