in src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs [168:588]
public override Fields Get(int doc)
{
EnsureOpen();
// seek to the right place
{
long startPointer = indexReader.GetStartPointer(doc);
vectorsStream.Seek(startPointer);
}
// decode
// - docBase: first doc ID of the chunk
// - chunkDocs: number of docs of the chunk
int docBase = vectorsStream.ReadVInt32();
int chunkDocs = vectorsStream.ReadVInt32();
if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs)
{
throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")");
}
int skip; // number of fields to skip
int numFields; // number of fields of the document we're looking for
int totalFields; // total number of fields of the chunk (sum for all docs)
if (chunkDocs == 1)
{
skip = 0;
numFields = totalFields = vectorsStream.ReadVInt32();
}
else
{
reader.Reset(vectorsStream, chunkDocs);
int sum = 0;
for (int i = docBase; i < doc; ++i)
{
sum += (int)reader.Next();
}
skip = sum;
numFields = (int)reader.Next();
sum += numFields;
for (int i = doc + 1; i < docBase + chunkDocs; ++i)
{
sum += (int)reader.Next();
}
totalFields = sum;
}
if (numFields == 0)
{
// no vectors
return null;
}
// read field numbers that have term vectors
int[] fieldNums;
{
int token = vectorsStream.ReadByte() & 0xFF;
if (Debugging.AssertsEnabled) Debugging.Assert(token != 0); // means no term vectors, cannot happen since we checked for numFields == 0
int bitsPerFieldNum = token & 0x1F;
int totalDistinctFields = token >>> 5;
if (totalDistinctFields == 0x07)
{
totalDistinctFields += vectorsStream.ReadVInt32();
}
++totalDistinctFields;
PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1);
fieldNums = new int[totalDistinctFields];
for (int i = 0; i < totalDistinctFields; ++i)
{
fieldNums[i] = (int)it.Next();
}
}
// read field numbers and flags
int[] fieldNumOffs = new int[numFields];
PackedInt32s.Reader flags;
{
int bitsPerOff = PackedInt32s.BitsRequired(fieldNums.Length - 1);
PackedInt32s.Reader allFieldNumOffs = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
switch (vectorsStream.ReadVInt32())
{
case 0:
PackedInt32s.Reader fieldFlags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, fieldNums.Length, CompressingTermVectorsWriter.FLAGS_BITS);
PackedInt32s.Mutable f = PackedInt32s.GetMutable(totalFields, CompressingTermVectorsWriter.FLAGS_BITS, PackedInt32s.COMPACT);
for (int i = 0; i < totalFields; ++i)
{
int fieldNumOff = (int)allFieldNumOffs.Get(i);
if (Debugging.AssertsEnabled) Debugging.Assert(fieldNumOff >= 0 && fieldNumOff < fieldNums.Length);
int fgs = (int)fieldFlags.Get(fieldNumOff);
f.Set(i, fgs);
}
flags = f;
break;
case 1:
flags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, CompressingTermVectorsWriter.FLAGS_BITS);
break;
default:
throw AssertionError.Create();
}
for (int i = 0; i < numFields; ++i)
{
fieldNumOffs[i] = (int)allFieldNumOffs.Get(skip + i);
}
}
// number of terms per field for all fields
PackedInt32s.Reader numTerms;
int totalTerms;
{
int bitsRequired = vectorsStream.ReadVInt32();
numTerms = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsRequired);
int sum = 0;
for (int i = 0; i < totalFields; ++i)
{
sum += (int)numTerms.Get(i);
}
totalTerms = sum;
}
// term lengths
int docOff = 0, docLen = 0, totalLen;
int[] fieldLengths = new int[numFields];
int[][] prefixLengths = new int[numFields][];
int[][] suffixLengths = new int[numFields][];
{
reader.Reset(vectorsStream, totalTerms);
// skip
int toSkip = 0;
for (int i = 0; i < skip; ++i)
{
toSkip += (int)numTerms.Get(i);
}
reader.Skip(toSkip);
// read prefix lengths
for (int i = 0; i < numFields; ++i)
{
int termCount = (int)numTerms.Get(skip + i);
int[] fieldPrefixLengths = new int[termCount];
prefixLengths[i] = fieldPrefixLengths;
for (int j = 0; j < termCount; )
{
Int64sRef next = reader.Next(termCount - j);
for (int k = 0; k < next.Length; ++k)
{
fieldPrefixLengths[j++] = (int)next.Int64s[next.Offset + k];
}
}
}
reader.Skip(totalTerms - reader.Ord);
reader.Reset(vectorsStream, totalTerms);
// skip
//toSkip = 0; // LUCENENET: IDE0059: Remove unnecessary value assignment
for (int i = 0; i < skip; ++i)
{
for (int j = 0; j < numTerms.Get(i); ++j)
{
docOff += (int)reader.Next();
}
}
for (int i = 0; i < numFields; ++i)
{
int termCount = (int)numTerms.Get(skip + i);
int[] fieldSuffixLengths = new int[termCount];
suffixLengths[i] = fieldSuffixLengths;
for (int j = 0; j < termCount; )
{
Int64sRef next = reader.Next(termCount - j);
for (int k = 0; k < next.Length; ++k)
{
fieldSuffixLengths[j++] = (int)next.Int64s[next.Offset + k];
}
}
fieldLengths[i] = Sum(suffixLengths[i]);
docLen += fieldLengths[i];
}
totalLen = docOff + docLen;
for (int i = skip + numFields; i < totalFields; ++i)
{
for (int j = 0; j < numTerms.Get(i); ++j)
{
totalLen += (int)reader.Next();
}
}
}
// term freqs
int[] termFreqs = new int[totalTerms];
{
reader.Reset(vectorsStream, totalTerms);
for (int i = 0; i < totalTerms; )
{
Int64sRef next = reader.Next(totalTerms - i);
for (int k = 0; k < next.Length; ++k)
{
termFreqs[i++] = 1 + (int)next.Int64s[next.Offset + k];
}
}
}
// total number of positions, offsets and payloads
int totalPositions = 0, totalOffsets = 0, totalPayloads = 0;
for (int i = 0, termIndex = 0; i < totalFields; ++i)
{
int f = (int)flags.Get(i);
int termCount = (int)numTerms.Get(i);
for (int j = 0; j < termCount; ++j)
{
int freq = termFreqs[termIndex++];
if ((f & CompressingTermVectorsWriter.POSITIONS) != 0)
{
totalPositions += freq;
}
if ((f & CompressingTermVectorsWriter.OFFSETS) != 0)
{
totalOffsets += freq;
}
if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
{
totalPayloads += freq;
}
}
if (Debugging.AssertsEnabled) Debugging.Assert(i != totalFields - 1 || termIndex == totalTerms, "{0} {1}", termIndex, totalTerms);
}
int[][] positionIndex = PositionIndex(skip, numFields, numTerms, termFreqs);
int[][] positions, startOffsets, lengths;
if (totalPositions > 0)
{
positions = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.POSITIONS, totalPositions, positionIndex);
}
else
{
positions = new int[numFields][];
}
if (totalOffsets > 0)
{
// average number of chars per term
float[] charsPerTerm = new float[fieldNums.Length];
for (int i = 0; i < charsPerTerm.Length; ++i)
{
charsPerTerm[i] = J2N.BitConversion.Int32BitsToSingle(vectorsStream.ReadInt32());
}
startOffsets = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex);
lengths = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex);
for (int i = 0; i < numFields; ++i)
{
int[] fStartOffsets = startOffsets[i];
int[] fPositions = positions[i];
// patch offsets from positions
if (fStartOffsets != null && fPositions != null)
{
float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]];
for (int j = 0; j < startOffsets[i].Length; ++j)
{
fStartOffsets[j] += (int)(fieldCharsPerTerm * fPositions[j]);
}
}
if (fStartOffsets != null)
{
int[] fPrefixLengths = prefixLengths[i];
int[] fSuffixLengths = suffixLengths[i];
int[] fLengths = lengths[i];
for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j)
{
// delta-decode start offsets and patch lengths using term lengths
int termLength = fPrefixLengths[j] + fSuffixLengths[j];
lengths[i][positionIndex[i][j]] += termLength;
for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k)
{
fStartOffsets[k] += fStartOffsets[k - 1];
fLengths[k] += termLength;
}
}
}
}
}
else
{
startOffsets = lengths = new int[numFields][];
}
if (totalPositions > 0)
{
// delta-decode positions
for (int i = 0; i < numFields; ++i)
{
int[] fPositions = positions[i];
int[] fpositionIndex = positionIndex[i];
if (fPositions != null)
{
for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j)
{
// delta-decode start offsets
for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k)
{
fPositions[k] += fPositions[k - 1];
}
}
}
}
}
// payload lengths
int[][] payloadIndex = new int[numFields][];
int totalPayloadLength = 0;
int payloadOff = 0;
int payloadLen = 0;
if (totalPayloads > 0)
{
reader.Reset(vectorsStream, totalPayloads);
// skip
int termIndex = 0;
for (int i = 0; i < skip; ++i)
{
int f = (int)flags.Get(i);
int termCount = (int)numTerms.Get(i);
if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
{
for (int j = 0; j < termCount; ++j)
{
int freq = termFreqs[termIndex + j];
for (int k = 0; k < freq; ++k)
{
int l = (int)reader.Next();
payloadOff += l;
}
}
}
termIndex += termCount;
}
totalPayloadLength = payloadOff;
// read doc payload lengths
for (int i = 0; i < numFields; ++i)
{
int f = (int)flags.Get(skip + i);
int termCount = (int)numTerms.Get(skip + i);
if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
{
int totalFreq = positionIndex[i][termCount];
payloadIndex[i] = new int[totalFreq + 1];
int posIdx = 0;
payloadIndex[i][posIdx] = payloadLen;
for (int j = 0; j < termCount; ++j)
{
int freq = termFreqs[termIndex + j];
for (int k = 0; k < freq; ++k)
{
int payloadLength = (int)reader.Next();
payloadLen += payloadLength;
payloadIndex[i][posIdx + 1] = payloadLen;
++posIdx;
}
}
if (Debugging.AssertsEnabled) Debugging.Assert(posIdx == totalFreq);
}
termIndex += termCount;
}
totalPayloadLength += payloadLen;
for (int i = skip + numFields; i < totalFields; ++i)
{
int f = (int)flags.Get(i);
int termCount = (int)numTerms.Get(i);
if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
{
for (int j = 0; j < termCount; ++j)
{
int freq = termFreqs[termIndex + j];
for (int k = 0; k < freq; ++k)
{
totalPayloadLength += (int)reader.Next();
}
}
}
termIndex += termCount;
}
if (Debugging.AssertsEnabled) Debugging.Assert(termIndex == totalTerms, "{0} {1}", termIndex, totalTerms);
}
// decompress data
BytesRef suffixBytes = new BytesRef();
decompressor.Decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes);
suffixBytes.Length = docLen;
BytesRef payloadBytes = new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + docLen, payloadLen);
int[] FieldFlags = new int[numFields];
for (int i = 0; i < numFields; ++i)
{
FieldFlags[i] = (int)flags.Get(skip + i);
}
int[] fieldNumTerms = new int[numFields];
for (int i = 0; i < numFields; ++i)
{
fieldNumTerms[i] = (int)numTerms.Get(skip + i);
}
int[][] fieldTermFreqs = new int[numFields][];
{
int termIdx = 0;
for (int i = 0; i < skip; ++i)
{
termIdx += (int)numTerms.Get(i);
}
for (int i = 0; i < numFields; ++i)
{
int termCount = (int)numTerms.Get(skip + i);
fieldTermFreqs[i] = new int[termCount];
for (int j = 0; j < termCount; ++j)
{
fieldTermFreqs[i][j] = termFreqs[termIdx++];
}
}
}
if (Debugging.AssertsEnabled) Debugging.Assert(Sum(fieldLengths) == docLen, "{0} != {1}", Sum(fieldLengths), docLen);
return new TVFields(this, fieldNums, FieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths, prefixLengths, suffixLengths, fieldTermFreqs, positionIndex, positions, startOffsets, lengths, payloadBytes, payloadIndex, suffixBytes);
}