public virtual void Scan()

in src/Lucene.Net.Benchmark/Support/TagSoup/HTMLScanner.cs [395:684]


        public virtual void Scan(TextReader r, IScanHandler h)
        {
            theState = S_PCDATA;

            int firstChar = r.Peek();   // Remove any leading BOM
            if (firstChar == '\uFEFF') r.Read();

            while (theState != S_DONE)
            {
                int ch = r.Peek();
                bool unread = false;

                // Process control characters
                if (ch >= 0x80 && ch <= 0x9F) ch = theWinMap[ch - 0x80];

                if (ch == '\r')
                {
                    r.Read();
                    ch = r.Peek();      // expect LF next
                    if (ch != '\n')
                    {
                        unread = true;
                        ch = '\n';
                    }
                }

                if (ch == '\n')
                {
                    theCurrentLine++;
                    theCurrentColumn = 0;
                }
                else
                {
                    theCurrentColumn++;
                }

                if (!(ch >= 0x20 || ch == '\n' || ch == '\t' || ch == -1)) continue;

                // Search state table
                int adjCh = (ch >= -1 && ch < statetableIndexMaxChar) ? ch : -2;
                int statetableRow = statetableIndex[theState][adjCh + 2];
                int action = 0;
                if (statetableRow != -1)
                {
                    action = statetable[statetableRow + 2];
                    theNextState = statetable[statetableRow + 3];
                }

                //			System.err.println("In " + debug_statenames[theState] + " got " + nicechar(ch) + " doing " + debug_actionnames[action] + " then " + debug_statenames[theNextState]);
                switch (action)
                {
                    case 0:
                        throw Error.Create(
                            "HTMLScanner can't cope with " + ch + " in state " +
                            theState);
                    case A_ADUP:
                        h.Adup(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_ADUP_SAVE:
                        h.Adup(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        Save(ch, h);
                        break;
                    case A_ADUP_STAGC:
                        h.Adup(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        h.STagC(theOutputBuffer, 0, theSize);
                        break;
                    case A_ANAME:
                        h.Aname(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_ANAME_ADUP:
                        h.Aname(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        h.Adup(theOutputBuffer, 0, theSize);
                        break;
                    case A_ANAME_ADUP_STAGC:
                        h.Aname(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        h.Adup(theOutputBuffer, 0, theSize);
                        h.STagC(theOutputBuffer, 0, theSize);
                        break;
                    case A_AVAL:
                        h.Aval(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_AVAL_STAGC:
                        h.Aval(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        h.STagC(theOutputBuffer, 0, theSize);
                        break;
                    case A_CDATA:
                        Mark();
                        // suppress the final "]]" in the buffer
                        if (theSize > 1) theSize -= 2;
                        h.PCDATA(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_ENTITY_START:
                        h.PCDATA(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        Save(ch, h);
                        break;
                    case A_ENTITY:
                        Mark();
                        char ch1 = (char)ch;
                        //				System.out.println("Got " + ch1 + " in state " + ((theState == S_ENT) ? "S_ENT" : ((theState == S_NCR) ? "S_NCR" : "UNK")));
                        if (theState == S_ENT && ch1 == '#')
                        {
                            theNextState = S_NCR;
                            Save(ch, h);
                            break;
                        }
                        else if (theState == S_NCR && (ch1 == 'x' || ch1 == 'X'))
                        {
                            theNextState = S_XNCR;
                            Save(ch, h);
                            break;
                        }
                        else if (theState == S_ENT && char.IsLetterOrDigit(ch1))
                        {
                            Save(ch, h);
                            break;
                        }
                        else if (theState == S_NCR && char.IsDigit(ch1))
                        {
                            Save(ch, h);
                            break;
                        }
                        else if (theState == S_XNCR && (char.IsDigit(ch1) || "abcdefABCDEF".IndexOf(ch1) != -1))
                        {
                            Save(ch, h);
                            break;
                        }

                        // The whole entity reference has been collected
                        //				System.err.println("%%" + new String(theOutputBuffer, 0, theSize));
                        h.Entity(theOutputBuffer, 1, theSize - 1);
                        int ent = h.GetEntity();
                        //				System.err.println("%% value = " + ent);
                        if (ent != 0)
                        {
                            theSize = 0;
                            if (ent >= 0x80 && ent <= 0x9F)
                            {
                                ent = theWinMap[ent - 0x80];
                            }
                            if (ent < 0x20)
                            {
                                // Control becomes space
                                //ent = 0x20; // LUCENENET: IDE0059: Remove unnecessary value assignment
                            }
                            else if (ent >= 0xD800 && ent <= 0xDFFF)
                            {
                                // Surrogates get dropped
                                //ent = 0; // LUCENENET: IDE0059: Remove unnecessary value assignment
                            }
                            else if (ent <= 0xFFFF)
                            {
                                // BMP character
                                Save(ent, h);
                            }
                            else
                            {
                                // Astral converted to two surrogates
                                ent -= 0x10000;
                                Save((ent >> 10) + 0xD800, h);
                                Save((ent & 0x3FF) + 0xDC00, h);
                            }
                            if (ch != ';')
                            {
                                unread = true;
                                theCurrentColumn--;
                            }
                        }
                        else
                        {
                            unread = true;
                            theCurrentColumn--;
                        }
                        theNextState = S_PCDATA;
                        break;
                    case A_ETAG:
                        h.ETag(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_DECL:
                        h.Decl(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_GI:
                        h.GI(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_GI_STAGC:
                        h.GI(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        h.STagC(theOutputBuffer, 0, theSize);
                        break;
                    case A_LT:
                        Mark();
                        Save('<', h);
                        Save(ch, h);
                        break;
                    case A_LT_PCDATA:
                        Mark();
                        Save('<', h);
                        h.PCDATA(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_PCDATA:
                        Mark();
                        h.PCDATA(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_CMNT:
                        Mark();
                        h.Cmnt(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_MINUS3:
                        Save('-', h);
                        Save(' ', h);
                        break;
                    case A_MINUS2:
                        Save('-', h);
                        Save(' ', h);
                        Save('-', h);
                        Save(ch, h);
                        // fall through into A_MINUS
                        break;
                    case A_MINUS:
                        Save('-', h);
                        Save(ch, h);
                        break;
                    case A_PI:
                        Mark();
                        h.PI(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_PITARGET:
                        h.PITarget(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_PITARGET_PI:
                        h.PITarget(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        h.PI(theOutputBuffer, 0, theSize);
                        break;
                    case A_SAVE:
                        Save(ch, h);
                        break;
                    case A_SKIP:
                        break;
                    case A_SP:
                        Save(' ', h);
                        break;
                    case A_STAGC:
                        h.STagC(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    case A_EMPTYTAG:
                        Mark();
                        //				System.err.println("%%% Empty tag seen");
                        if (theSize > 0) h.GI(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        h.STagE(theOutputBuffer, 0, theSize);
                        break;
                    case A_UNGET:
                        unread = true;
                        theCurrentColumn--;
                        break;
                    case A_UNSAVE_PCDATA:
                        if (theSize > 0) theSize--;
                        h.PCDATA(theOutputBuffer, 0, theSize);
                        theSize = 0;
                        break;
                    default:
                        throw Error.Create("Can't process state " + action);
                }
                if (!unread)
                {
                    r.Read();
                }
                theState = theNextState;
            }
            h.EOF(theOutputBuffer, 0, 0);
        }