private static void CheckAnalysisConsistency()

in src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs [927:1197]


        private static void CheckAnalysisConsistency(Random random, Analyzer a, bool useCharFilter, string text, bool offsetsAreCorrect, Field field)
        {
            if (Verbose)
            {
                Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: get first token stream now text=" + text);
            }

            int remainder = random.Next(10);
            TextReader reader = new StringReader(text);
            TokenStream ts = a.GetTokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
            ICharTermAttribute termAtt = ts.HasAttribute<ICharTermAttribute>() ? ts.GetAttribute<ICharTermAttribute>() : null;
            IOffsetAttribute offsetAtt = ts.HasAttribute<IOffsetAttribute>() ? ts.GetAttribute<IOffsetAttribute>() : null;
            IPositionIncrementAttribute posIncAtt = ts.HasAttribute<IPositionIncrementAttribute>() ? ts.GetAttribute<IPositionIncrementAttribute>() : null;
            IPositionLengthAttribute posLengthAtt = ts.HasAttribute<IPositionLengthAttribute>() ? ts.GetAttribute<IPositionLengthAttribute>() : null;
            ITypeAttribute typeAtt = ts.HasAttribute<ITypeAttribute>() ? ts.GetAttribute<ITypeAttribute>() : null;
            IList<string> tokens = new JCG.List<string>();
            IList<string> types = new JCG.List<string>();
            IList<int> positions = new JCG.List<int>();
            IList<int> positionLengths = new JCG.List<int>();
            IList<int> startOffsets = new JCG.List<int>();
            IList<int> endOffsets = new JCG.List<int>();
            ts.Reset();

            try
            {
                // First pass: save away "correct" tokens
                while (ts.IncrementToken())
                {
                    Assert.IsNotNull(termAtt, "has no CharTermAttribute");
                    tokens.Add(termAtt.ToString());
                    if (typeAtt != null)
                    {
                        types.Add(typeAtt.Type);
                    }

                    if (posIncAtt != null)
                    {
                        positions.Add(posIncAtt.PositionIncrement);
                    }

                    if (posLengthAtt != null)
                    {
                        positionLengths.Add(posLengthAtt.PositionLength);
                    }

                    if (offsetAtt != null)
                    {
                        startOffsets.Add(offsetAtt.StartOffset);
                        endOffsets.Add(offsetAtt.EndOffset);
                    }
                }

                ts.End();
            }
            finally
            {
                // LUCENENET: We are doing this in the finally block to ensure it happens
                // when there are exceptions thrown (such as when the assert fails).
                ts.Close();
            }

            // verify reusing is "reproducable" and also get the normal tokenstream sanity checks
            if (tokens.Count > 0)
            {
                // KWTokenizer (for example) can produce a token
                // even when input is length 0:
                if (text.Length != 0)
                {
                    // (Optional) second pass: do something evil:
                    int evilness = random.Next(50);
                    if (evilness == 17)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis w/ exception");
                        }
                        // Throw an errant exception from the Reader:

                        using MockReaderWrapper evilReader = new MockReaderWrapper(random, new StringReader(text));
                        evilReader.ThrowExcAfterChar(random.Next(text.Length)); // LUCENENET note, Next() is exclusive, so we don't need +1
                        //reader = evilReader; // LUCENENET: IDE0059: Remove unnecessary value assignment

                        try
                        {
                            // NOTE: some Tokenizers go and read characters
                            // when you call .SetReader(TextReader), eg
                            // PatternTokenizer.  this is a bit
                            // iffy... (really, they should only
                            // pull from the TextReader when you call
                            // .IncremenToken(), I think?), but we
                            // currently allow it, so, we must call
                            // a.TokenStream inside the try since we may
                            // hit the exc on init:
                            ts = a.GetTokenStream("dummy", useCharFilter ? new MockCharFilter(evilReader, remainder) : evilReader);
                            ts.Reset();
                            while (ts.IncrementToken()) ;
                            Assert.Fail("did not hit exception");
                        }
                        catch (Exception re) when (re.IsRuntimeException())
                        {
                            Assert.IsTrue(MockReaderWrapper.IsMyEvilException(re));
                        }

                        try
                        {
                            ts.End();
                        }
                        catch (Exception ae) when (ae.IsAssertionError() && ae.Message.Contains("End() called before IncrementToken() returned false!"))
                        {
                            // Catch & ignore MockTokenizer's
                            // anger...
                            // OK
                        }
                        finally
                        {
                            ts.Close();
                        }
                    }
                    else if (evilness == 7)
                    {
                        // Only consume a subset of the tokens:
                        int numTokensToRead = random.Next(tokens.Count);
                        if (Verbose)
                        {
                            Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis, only consuming " + numTokensToRead + " of " + tokens.Count + " tokens");
                        }

                        reader = new StringReader(text);
                        ts = a.GetTokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
                        ts.Reset();
                        for (int tokenCount = 0; tokenCount < numTokensToRead; tokenCount++)
                        {
                            Assert.IsTrue(ts.IncrementToken());
                        }

                        try
                        {
                            ts.End();
                        }
                        catch (Exception ae) when (ae.IsAssertionError() && ae.Message.Contains("End() called before IncrementToken() returned false!"))
                        {
                            // Catch & ignore MockTokenizer's
                            // anger...
                            // OK
                        }
                        finally
                        {
                            ts.Close();
                        }
                    }
                }
            }

            // Final pass: verify clean tokenization matches
            // results from first pass:

            if (Verbose)
            {
                Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis; " + tokens.Count + " tokens");
            }
            reader = new StringReader(text);

            long seed = random.NextInt64();
            random = new J2N.Randomizer(seed);
            if (random.Next(30) == 7)
            {
                if (Verbose)
                {
                    Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: using spoon-feed reader");
                }

                reader = new MockReaderWrapper(random, reader);
            }

            ts = a.GetTokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);

            try
            {
                if (typeAtt != null && posIncAtt != null && posLengthAtt != null && offsetAtt != null)
                {
                    // offset + pos + posLength + type
                    AssertTokenStreamContents(ts,
                        tokens.ToArray(),
                        ToIntArray(startOffsets),
                        ToIntArray(endOffsets),
                        types.ToArray(),
                        ToIntArray(positions),
                        ToIntArray(positionLengths),
                        text.Length,
                        offsetsAreCorrect);
                }
                else if (typeAtt != null && posIncAtt != null && offsetAtt != null)
                {
                    // offset + pos + type
                    AssertTokenStreamContents(ts,
                        tokens.ToArray(),
                        ToIntArray(startOffsets),
                        ToIntArray(endOffsets),
                        types.ToArray(),
                        ToIntArray(positions),
                        null,
                        text.Length,
                        offsetsAreCorrect);
                }
                else if (posIncAtt != null && posLengthAtt != null && offsetAtt != null)
                {
                    // offset + pos + posLength
                    AssertTokenStreamContents(ts,
                        tokens.ToArray(),
                        ToIntArray(startOffsets),
                        ToIntArray(endOffsets),
                        null,
                        ToIntArray(positions),
                        ToIntArray(positionLengths),
                        text.Length,
                        offsetsAreCorrect);
                }
                else if (posIncAtt != null && offsetAtt != null)
                {
                    // offset + pos
                    AssertTokenStreamContents(ts,
                        tokens.ToArray(),
                        ToIntArray(startOffsets),
                        ToIntArray(endOffsets),
                        null,
                        ToIntArray(positions),
                        null,
                        text.Length,
                        offsetsAreCorrect);
                }
                else if (offsetAtt != null)
                {
                    // offset
                    AssertTokenStreamContents(ts,
                        tokens.ToArray(),
                        ToIntArray(startOffsets),
                        ToIntArray(endOffsets),
                        null,
                        null,
                        null,
                        text.Length,
                        offsetsAreCorrect);
                }
                else
                {
                    // terms only
                    AssertTokenStreamContents(ts, tokens.ToArray());
                }
            }
            finally
            {
                ts.Close();
            }

            if (field != null)
            {
                reader = new StringReader(text);
                random = new J2N.Randomizer(seed);
                if (random.Next(30) == 7)
                {
                    if (Verbose)
                    {
                        Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: indexing using spoon-feed reader");
                    }

                    reader = new MockReaderWrapper(random, reader);
                }

                field.SetReaderValue(useCharFilter ? new MockCharFilter(reader, remainder) : reader);
            }
        }