in src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs [117:389]
public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, int? finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect, byte[][] payloads)
{
// LUCENENET: Bug fix: NUnit throws an exception when something fails.
// This causes Dispose() to be skipped and it pollutes other tests indicating false negatives.
// Added this try-finally block to fix this.
try
{
Assert.IsNotNull(output);
var checkClearAtt = ts.AddAttribute<ICheckClearAttributesAttribute>();
ICharTermAttribute termAtt = null;
if (output.Length > 0)
{
Assert.IsTrue(ts.HasAttribute<ICharTermAttribute>(), "has no ICharTermAttribute");
termAtt = ts.GetAttribute<ICharTermAttribute>();
}
IOffsetAttribute offsetAtt = null;
if (startOffsets != null || endOffsets != null || finalOffset != null)
{
Assert.IsTrue(ts.HasAttribute<IOffsetAttribute>(), "has no IOffsetAttribute");
offsetAtt = ts.GetAttribute<IOffsetAttribute>();
}
ITypeAttribute typeAtt = null;
if (types != null)
{
Assert.IsTrue(ts.HasAttribute<ITypeAttribute>(), "has no ITypeAttribute");
typeAtt = ts.GetAttribute<ITypeAttribute>();
}
IPositionIncrementAttribute posIncrAtt = null;
if (posIncrements != null || finalPosInc != null)
{
Assert.IsTrue(ts.HasAttribute<IPositionIncrementAttribute>(), "has no IPositionIncrementAttribute");
posIncrAtt = ts.GetAttribute<IPositionIncrementAttribute>();
}
IPositionLengthAttribute posLengthAtt = null;
if (posLengths != null)
{
Assert.IsTrue(ts.HasAttribute<IPositionLengthAttribute>(), "has no IPositionLengthAttribute");
posLengthAtt = ts.GetAttribute<IPositionLengthAttribute>();
}
IKeywordAttribute keywordAtt = null;
if (keywordAtts != null)
{
Assert.IsTrue(ts.HasAttribute<IKeywordAttribute>(), "has no IKeywordAttribute");
keywordAtt = ts.GetAttribute<IKeywordAttribute>();
}
// *********** From Lucene 8.2.0 **************
IPayloadAttribute payloadAtt = null;
if (payloads != null)
{
Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>(), "has no IPayloadAttribute");
payloadAtt = ts.GetAttribute<IPayloadAttribute>();
}
// *********** End From Lucene 8.2.0 **************
// Maps position to the start/end offset:
IDictionary<int, int> posToStartOffset = new Dictionary<int, int>();
IDictionary<int, int> posToEndOffset = new Dictionary<int, int>();
ts.Reset();
int pos = -1;
int lastStartOffset = 0;
for (int i = 0; i < output.Length; i++)
{
// extra safety to enforce, that the state is not preserved and also assign bogus values
ts.ClearAttributes();
termAtt.SetEmpty().Append("bogusTerm");
if (offsetAtt != null)
{
offsetAtt.SetOffset(14584724, 24683243);
}
if (typeAtt != null)
{
typeAtt.Type = "bogusType";
}
if (posIncrAtt != null)
{
posIncrAtt.PositionIncrement = 45987657;
}
if (posLengthAtt != null)
{
posLengthAtt.PositionLength = 45987653;
}
if (keywordAtt != null)
{
keywordAtt.IsKeyword = (i & 1) == 0;
}
// *********** From Lucene 8.2.0 **************
if (payloadAtt != null)
{
payloadAtt.Payload = new BytesRef(new byte[] { 0x00, unchecked((byte)-0x21), 0x12, unchecked((byte)-0x43), 0x24 });
}
// *********** End From Lucene 8.2.0 **************
bool reset = checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before
Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist");
Assert.IsTrue(reset, "ClearAttributes() was not called correctly in TokenStream chain");
Assert.AreEqual(output[i], termAtt.ToString(), "term " + i + ", output[i] = " + output[i] + ", termAtt = " + termAtt.ToString());
if (startOffsets != null)
{
Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset, "startOffset " + i);
}
if (endOffsets != null)
{
Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset, "endOffset " + i);
}
if (types != null)
{
Assert.AreEqual(types[i], typeAtt.Type, "type " + i);
}
if (posIncrements != null)
{
Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i);
}
if (posLengths != null)
{
Assert.AreEqual(posLengths[i], posLengthAtt.PositionLength, "posLength " + i);
}
if (keywordAtts != null)
{
Assert.AreEqual(keywordAtts[i], keywordAtt.IsKeyword, "keywordAtt " + i);
}
// *********** From Lucene 8.2.0 **************
if (payloads != null)
{
if (payloads[i] != null)
{
Assert.AreEqual(new BytesRef(payloads[i]), payloadAtt.Payload, "payloads " + i);
}
else
{
Assert.IsNull(payloads[i], "payloads " + i);
}
}
// *********** End From Lucene 8.2.0 **************
// we can enforce some basic things about a few attributes even if the caller doesn't check:
if (offsetAtt != null)
{
int startOffset = offsetAtt.StartOffset;
int endOffset = offsetAtt.EndOffset;
if (finalOffset != null)
{
Assert.IsTrue(startOffset <= (int)finalOffset, "startOffset must be <= finalOffset");
Assert.IsTrue(endOffset <= (int)finalOffset, "endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + (int)finalOffset);
}
if (offsetsAreCorrect)
{
Assert.IsTrue(offsetAtt.StartOffset >= lastStartOffset, "offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset);
lastStartOffset = offsetAtt.StartOffset;
}
if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null)
{
// Validate offset consistency in the graph, ie
// all tokens leaving from a certain pos have the
// same startOffset, and all tokens arriving to a
// certain pos have the same endOffset:
int posInc = posIncrAtt.PositionIncrement;
pos += posInc;
int posLength = posLengthAtt.PositionLength;
if (!posToStartOffset.TryGetValue(pos, out int oldStartOffset))
{
// First time we've seen a token leaving from this position:
posToStartOffset[pos] = startOffset;
//System.out.println(" + s " + pos + " -> " + startOffset);
}
else
{
// We've seen a token leaving from this position
// before; verify the startOffset is the same:
//System.out.println(" + vs " + pos + " -> " + startOffset);
Assert.AreEqual(oldStartOffset, startOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt);
}
int endPos = pos + posLength;
if (!posToEndOffset.TryGetValue(endPos, out int oldEndOffset))
{
// First time we've seen a token arriving to this position:
posToEndOffset[endPos] = endOffset;
//System.out.println(" + e " + endPos + " -> " + endOffset);
}
else
{
// We've seen a token arriving to this position
// before; verify the endOffset is the same:
//System.out.println(" + ve " + endPos + " -> " + endOffset);
Assert.AreEqual(oldEndOffset, endOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt);
}
}
}
if (posIncrAtt != null)
{
if (i == 0)
{
Assert.IsTrue(posIncrAtt.PositionIncrement >= 1, "first posIncrement must be >= 1");
}
else
{
Assert.IsTrue(posIncrAtt.PositionIncrement >= 0, "posIncrement must be >= 0");
}
}
if (posLengthAtt != null)
{
Assert.IsTrue(posLengthAtt.PositionLength >= 1, "posLength must be >= 1");
}
}
if (ts.IncrementToken())
{
Assert.Fail("TokenStream has more tokens than expected (expected count=" + output.Length + "); extra token=" + termAtt);
}
// repeat our extra safety checks for End()
ts.ClearAttributes();
if (termAtt != null)
{
termAtt.SetEmpty().Append("bogusTerm");
}
if (offsetAtt != null)
{
offsetAtt.SetOffset(14584724, 24683243);
}
if (typeAtt != null)
{
typeAtt.Type = "bogusType";
}
if (posIncrAtt != null)
{
posIncrAtt.PositionIncrement = 45987657;
}
if (posLengthAtt != null)
{
posLengthAtt.PositionLength = 45987653;
}
var reset_ = checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before
ts.End();
Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "base.End()/ClearAttributes() was not called correctly in End()");
if (finalOffset != null)
{
Assert.AreEqual((int)finalOffset, offsetAtt.EndOffset, "finalOffset");
}
if (offsetAtt != null)
{
Assert.IsTrue(offsetAtt.EndOffset >= 0, "finalOffset must be >= 0");
}
if (finalPosInc != null)
{
Assert.AreEqual((int)finalPosInc, posIncrAtt.PositionIncrement, "finalPosInc");
}
}
finally
{
ts.Close();
}
}