private bool ScanIdentifier_CrefSlowPath()

in src/Compilers/CSharp/Portable/Parser/Lexer.cs [1908:2147]


        private bool ScanIdentifier_CrefSlowPath(ref TokenInfo info)
        {
            Debug.Assert(InXmlCrefOrNameAttributeValue);

            int start = TextWindow.Position;
            this.ResetIdentBuffer();

            if (AdvanceIfMatches('@'))
            {
                // In xml name attribute values, the '@' is part of the value text of the identifier
                // (to match dev11).
                if (InXmlNameAttributeValue)
                {
                    AddIdentChar('@');
                }
                else
                {
                    info.IsVerbatim = true;
                }
            }

            while (true)
            {
                int beforeConsumed = TextWindow.Position;
                char consumedChar;
                char consumedSurrogate;

                if (TextWindow.PeekChar() == '&')
                {
                    if (!TextWindow.TryScanXmlEntity(out consumedChar, out consumedSurrogate))
                    {
                        // If it's not a valid entity, then it's not part of the identifier.
                        TextWindow.Reset(beforeConsumed);
                        goto LoopExit;
                    }
                }
                else
                {
                    consumedChar = TextWindow.NextChar();
                    consumedSurrogate = SlidingTextWindow.InvalidCharacter;
                }

                // NOTE: If the surrogate is non-zero, then consumedChar won't match
                // any of the cases below (UTF-16 guarantees that members of surrogate
                // pairs aren't separately valid).

                bool isEscaped = false;
                top:
                switch (consumedChar)
                {
                    case '\\':
                        // NOTE: For completeness, we should allow xml entities in unicode escape
                        // sequences (DevDiv #16321).  Since it is not currently a priority, we will
                        // try to make the interim behavior sensible: we will only attempt to scan
                        // a unicode escape if NONE of the characters are XML entities (including
                        // the backslash, which we have already consumed).
                        // When we're ready to implement this behavior, we can drop the position
                        // check and use AdvanceIfMatches instead of PeekChar.
                        if (!isEscaped && (TextWindow.Position == beforeConsumed + 1) &&
                            (TextWindow.PeekChar() == 'u' || TextWindow.PeekChar() == 'U'))
                        {
                            Debug.Assert(consumedSurrogate == SlidingTextWindow.InvalidCharacter, "Since consumedChar == '\\'");

                            info.HasIdentifierEscapeSequence = true;

                            TextWindow.Reset(beforeConsumed);
                            // ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape)
                            isEscaped = true;
                            SyntaxDiagnosticInfo error;
                            consumedChar = TextWindow.NextUnicodeEscape(out consumedSurrogate, out error);
                            AddCrefError(error);
                            goto top;
                        }

                        goto default;

                    case '_':
                    case 'A':
                    case 'B':
                    case 'C':
                    case 'D':
                    case 'E':
                    case 'F':
                    case 'G':
                    case 'H':
                    case 'I':
                    case 'J':
                    case 'K':
                    case 'L':
                    case 'M':
                    case 'N':
                    case 'O':
                    case 'P':
                    case 'Q':
                    case 'R':
                    case 'S':
                    case 'T':
                    case 'U':
                    case 'V':
                    case 'W':
                    case 'X':
                    case 'Y':
                    case 'Z':
                    case 'a':
                    case 'b':
                    case 'c':
                    case 'd':
                    case 'e':
                    case 'f':
                    case 'g':
                    case 'h':
                    case 'i':
                    case 'j':
                    case 'k':
                    case 'l':
                    case 'm':
                    case 'n':
                    case 'o':
                    case 'p':
                    case 'q':
                    case 'r':
                    case 's':
                    case 't':
                    case 'u':
                    case 'v':
                    case 'w':
                    case 'x':
                    case 'y':
                    case 'z':
                        {
                            // Again, these are the 'common' identifier characters...
                            break;
                        }

                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                    case '8':
                    case '9':
                        {
                            if (_identLen == 0)
                            {
                                TextWindow.Reset(beforeConsumed);
                                goto LoopExit;
                            }

                            // Again, these are the 'common' identifier characters...
                            break;
                        }

                    case ' ':
                    case '$':
                    case '\t':
                    case '.':
                    case ';':
                    case '(':
                    case ')':
                    case ',':
                    case '<':
                        // ...and these are the 'common' stop characters.
                        TextWindow.Reset(beforeConsumed);
                        goto LoopExit;
                    case SlidingTextWindow.InvalidCharacter:
                        if (!TextWindow.IsReallyAtEnd())
                        {
                            goto default;
                        }

                        TextWindow.Reset(beforeConsumed);
                        goto LoopExit;
                    default:
                        {
                            // This is the 'expensive' call
                            if (_identLen == 0 && consumedChar > 127 && SyntaxFacts.IsIdentifierStartCharacter(consumedChar))
                            {
                                break;
                            }
                            else if (_identLen > 0 && consumedChar > 127 && SyntaxFacts.IsIdentifierPartCharacter(consumedChar))
                            {
                                //// BUG 424819 : Handle identifier chars > 0xFFFF via surrogate pairs
                                if (UnicodeCharacterUtilities.IsFormattingChar(consumedChar))
                                {
                                    continue; // Ignore formatting characters
                                }

                                break;
                            }
                            else
                            {
                                // Not a valid identifier character, so bail.
                                TextWindow.Reset(beforeConsumed);
                                goto LoopExit;
                            }
                        }
                }

                this.AddIdentChar(consumedChar);
                if (consumedSurrogate != SlidingTextWindow.InvalidCharacter)
                {
                    this.AddIdentChar(consumedSurrogate);
                }
            }

            LoopExit:
            if (_identLen > 0)
            {
                // NOTE: If we don't intern the string value, then we won't get a hit
                // in the keyword dictionary!  (It searches for a key using identity.)
                // The text does not have to be interned (and probably shouldn't be
                // if it contains entities (else-case).

                var width = TextWindow.Width; // exact size of input characters

                // id buffer is identical to width in input
                if (_identLen == width)
                {
                    info.StringValue = TextWindow.GetInternedText();
                    info.Text = info.StringValue;
                }
                else
                {
                    info.StringValue = TextWindow.Intern(_identBuffer, 0, _identLen);
                    info.Text = TextWindow.GetText(intern: false);
                }

                return true;
            }
            else
            {
                info.Text = null;
                info.StringValue = null;
                TextWindow.Reset(start);
                return false;
            }
        }