in src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs [927:1197]
private static void CheckAnalysisConsistency(Random random, Analyzer a, bool useCharFilter, string text, bool offsetsAreCorrect, Field field)
{
if (Verbose)
{
Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: get first token stream now text=" + text);
}
int remainder = random.Next(10);
TextReader reader = new StringReader(text);
TokenStream ts = a.GetTokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
ICharTermAttribute termAtt = ts.HasAttribute<ICharTermAttribute>() ? ts.GetAttribute<ICharTermAttribute>() : null;
IOffsetAttribute offsetAtt = ts.HasAttribute<IOffsetAttribute>() ? ts.GetAttribute<IOffsetAttribute>() : null;
IPositionIncrementAttribute posIncAtt = ts.HasAttribute<IPositionIncrementAttribute>() ? ts.GetAttribute<IPositionIncrementAttribute>() : null;
IPositionLengthAttribute posLengthAtt = ts.HasAttribute<IPositionLengthAttribute>() ? ts.GetAttribute<IPositionLengthAttribute>() : null;
ITypeAttribute typeAtt = ts.HasAttribute<ITypeAttribute>() ? ts.GetAttribute<ITypeAttribute>() : null;
IList<string> tokens = new JCG.List<string>();
IList<string> types = new JCG.List<string>();
IList<int> positions = new JCG.List<int>();
IList<int> positionLengths = new JCG.List<int>();
IList<int> startOffsets = new JCG.List<int>();
IList<int> endOffsets = new JCG.List<int>();
ts.Reset();
try
{
// First pass: save away "correct" tokens
while (ts.IncrementToken())
{
Assert.IsNotNull(termAtt, "has no CharTermAttribute");
tokens.Add(termAtt.ToString());
if (typeAtt != null)
{
types.Add(typeAtt.Type);
}
if (posIncAtt != null)
{
positions.Add(posIncAtt.PositionIncrement);
}
if (posLengthAtt != null)
{
positionLengths.Add(posLengthAtt.PositionLength);
}
if (offsetAtt != null)
{
startOffsets.Add(offsetAtt.StartOffset);
endOffsets.Add(offsetAtt.EndOffset);
}
}
ts.End();
}
finally
{
// LUCENENET: We are doing this in the finally block to ensure it happens
// when there are exceptions thrown (such as when the assert fails).
ts.Close();
}
// verify reusing is "reproducable" and also get the normal tokenstream sanity checks
if (tokens.Count > 0)
{
// KWTokenizer (for example) can produce a token
// even when input is length 0:
if (text.Length != 0)
{
// (Optional) second pass: do something evil:
int evilness = random.Next(50);
if (evilness == 17)
{
if (Verbose)
{
Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis w/ exception");
}
// Throw an errant exception from the Reader:
using MockReaderWrapper evilReader = new MockReaderWrapper(random, new StringReader(text));
evilReader.ThrowExcAfterChar(random.Next(text.Length)); // LUCENENET note, Next() is exclusive, so we don't need +1
//reader = evilReader; // LUCENENET: IDE0059: Remove unnecessary value assignment
try
{
// NOTE: some Tokenizers go and read characters
// when you call .SetReader(TextReader), eg
// PatternTokenizer. this is a bit
// iffy... (really, they should only
// pull from the TextReader when you call
// .IncremenToken(), I think?), but we
// currently allow it, so, we must call
// a.TokenStream inside the try since we may
// hit the exc on init:
ts = a.GetTokenStream("dummy", useCharFilter ? new MockCharFilter(evilReader, remainder) : evilReader);
ts.Reset();
while (ts.IncrementToken()) ;
Assert.Fail("did not hit exception");
}
catch (Exception re) when (re.IsRuntimeException())
{
Assert.IsTrue(MockReaderWrapper.IsMyEvilException(re));
}
try
{
ts.End();
}
catch (Exception ae) when (ae.IsAssertionError() && ae.Message.Contains("End() called before IncrementToken() returned false!"))
{
// Catch & ignore MockTokenizer's
// anger...
// OK
}
finally
{
ts.Close();
}
}
else if (evilness == 7)
{
// Only consume a subset of the tokens:
int numTokensToRead = random.Next(tokens.Count);
if (Verbose)
{
Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis, only consuming " + numTokensToRead + " of " + tokens.Count + " tokens");
}
reader = new StringReader(text);
ts = a.GetTokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
ts.Reset();
for (int tokenCount = 0; tokenCount < numTokensToRead; tokenCount++)
{
Assert.IsTrue(ts.IncrementToken());
}
try
{
ts.End();
}
catch (Exception ae) when (ae.IsAssertionError() && ae.Message.Contains("End() called before IncrementToken() returned false!"))
{
// Catch & ignore MockTokenizer's
// anger...
// OK
}
finally
{
ts.Close();
}
}
}
}
// Final pass: verify clean tokenization matches
// results from first pass:
if (Verbose)
{
Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: re-run analysis; " + tokens.Count + " tokens");
}
reader = new StringReader(text);
long seed = random.NextInt64();
random = new J2N.Randomizer(seed);
if (random.Next(30) == 7)
{
if (Verbose)
{
Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: using spoon-feed reader");
}
reader = new MockReaderWrapper(random, reader);
}
ts = a.GetTokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
try
{
if (typeAtt != null && posIncAtt != null && posLengthAtt != null && offsetAtt != null)
{
// offset + pos + posLength + type
AssertTokenStreamContents(ts,
tokens.ToArray(),
ToIntArray(startOffsets),
ToIntArray(endOffsets),
types.ToArray(),
ToIntArray(positions),
ToIntArray(positionLengths),
text.Length,
offsetsAreCorrect);
}
else if (typeAtt != null && posIncAtt != null && offsetAtt != null)
{
// offset + pos + type
AssertTokenStreamContents(ts,
tokens.ToArray(),
ToIntArray(startOffsets),
ToIntArray(endOffsets),
types.ToArray(),
ToIntArray(positions),
null,
text.Length,
offsetsAreCorrect);
}
else if (posIncAtt != null && posLengthAtt != null && offsetAtt != null)
{
// offset + pos + posLength
AssertTokenStreamContents(ts,
tokens.ToArray(),
ToIntArray(startOffsets),
ToIntArray(endOffsets),
null,
ToIntArray(positions),
ToIntArray(positionLengths),
text.Length,
offsetsAreCorrect);
}
else if (posIncAtt != null && offsetAtt != null)
{
// offset + pos
AssertTokenStreamContents(ts,
tokens.ToArray(),
ToIntArray(startOffsets),
ToIntArray(endOffsets),
null,
ToIntArray(positions),
null,
text.Length,
offsetsAreCorrect);
}
else if (offsetAtt != null)
{
// offset
AssertTokenStreamContents(ts,
tokens.ToArray(),
ToIntArray(startOffsets),
ToIntArray(endOffsets),
null,
null,
null,
text.Length,
offsetsAreCorrect);
}
else
{
// terms only
AssertTokenStreamContents(ts, tokens.ToArray());
}
}
finally
{
ts.Close();
}
if (field != null)
{
reader = new StringReader(text);
random = new J2N.Randomizer(seed);
if (random.Next(30) == 7)
{
if (Verbose)
{
Console.WriteLine(Thread.CurrentThread.Name + ": NOTE: baseTokenStreamTestCase: indexing using spoon-feed reader");
}
reader = new MockReaderWrapper(random, reader);
}
field.SetReaderValue(useCharFilter ? new MockCharFilter(reader, remainder) : reader);
}
}