in src/Lucene.Net.Codecs/Memory/DirectPostingsFormat.cs [369:629]
public DirectField(SegmentReadState state, string field, Terms termsIn, int minSkipCount, int lowFreqCutoff)
{
FieldInfo fieldInfo = state.FieldInfos.FieldInfo(field);
sumTotalTermFreq = termsIn.SumTotalTermFreq;
sumDocFreq = termsIn.SumDocFreq;
docCount = termsIn.DocCount;
int numTerms = (int) termsIn.Count;
if (numTerms == -1)
{
throw new ArgumentException("codec does not provide Terms.Count");
}
terms = new TermAndSkip[numTerms];
termOffsets = new int[1 + numTerms];
byte[] termBytes = new byte[1024];
this.minSkipCount = minSkipCount;
// LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare()
hasFreq = IndexOptionsComparer.Default.Compare(fieldInfo.IndexOptions, IndexOptions.DOCS_ONLY) > 0;
hasPos = IndexOptionsComparer.Default.Compare(fieldInfo.IndexOptions, IndexOptions.DOCS_AND_FREQS) > 0;
hasOffsets = IndexOptionsComparer.Default.Compare(fieldInfo.IndexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0;
hasPayloads = fieldInfo.HasPayloads;
BytesRef term;
DocsEnum docsEnum = null;
DocsAndPositionsEnum docsAndPositionsEnum = null;
TermsEnum termsEnum = termsIn.GetEnumerator();
int termOffset = 0;
Int32ArrayWriter scratch = new Int32ArrayWriter();
// Used for payloads, if any:
using RAMOutputStream ros = new RAMOutputStream(); // LUCENENET specific - dispose when done
// if (DEBUG) {
// System.out.println("\nLOAD terms seg=" + state.segmentInfo.name + " field=" + field + " hasOffsets=" + hasOffsets + " hasFreq=" + hasFreq + " hasPos=" + hasPos + " hasPayloads=" + hasPayloads);
// }
while (termsEnum.MoveNext())
{
term = termsEnum.Term;
int docFreq = termsEnum.DocFreq;
long totalTermFreq = termsEnum.TotalTermFreq;
// if (DEBUG) {
// System.out.println(" term=" + term.utf8ToString());
// }
termOffsets[count] = termOffset;
if (termBytes.Length < (termOffset + term.Length))
{
termBytes = ArrayUtil.Grow(termBytes, termOffset + term.Length);
}
Arrays.Copy(term.Bytes, term.Offset, termBytes, termOffset, term.Length);
termOffset += term.Length;
termOffsets[count + 1] = termOffset;
if (hasPos)
{
docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
}
else
{
docsEnum = termsEnum.Docs(null, docsEnum);
}
TermAndSkip ent;
DocsEnum docsEnum2;
docsEnum2 = hasPos ? docsAndPositionsEnum : docsEnum;
int docID;
if (docFreq <= lowFreqCutoff)
{
ros.Reset();
// Pack postings for low-freq terms into a single int[]:
while ((docID = docsEnum2.NextDoc()) != DocsEnum.NO_MORE_DOCS)
{
scratch.Add(docID);
if (hasFreq)
{
int freq = docsEnum2.Freq;
scratch.Add(freq);
if (hasPos)
{
for (int pos = 0; pos < freq; pos++)
{
scratch.Add(docsAndPositionsEnum.NextPosition());
if (hasOffsets)
{
scratch.Add(docsAndPositionsEnum.StartOffset);
scratch.Add(docsAndPositionsEnum.EndOffset);
}
if (hasPayloads)
{
BytesRef payload = docsAndPositionsEnum.GetPayload();
if (payload != null)
{
scratch.Add(payload.Length);
ros.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
}
else
{
scratch.Add(0);
}
}
}
}
}
}
byte[] payloads;
if (hasPayloads)
{
ros.Flush();
payloads = new byte[(int) ros.Length];
ros.WriteTo(payloads, 0);
}
else
{
payloads = null;
}
int[] postings = scratch.Get();
ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
}
else
{
var docs = new int[docFreq];
int[] freqs;
int[][] positions;
byte[][][] payloads;
if (hasFreq)
{
freqs = new int[docFreq];
if (hasPos)
{
positions = new int[docFreq][];
if (hasPayloads)
{
payloads = new byte[docFreq][][];
}
else
{
payloads = null;
}
}
else
{
positions = null;
payloads = null;
}
}
else
{
freqs = null;
positions = null;
payloads = null;
}
// Use separate int[] for the postings for high-freq
// terms:
int upto = 0;
while ((docID = docsEnum2.NextDoc()) != DocsEnum.NO_MORE_DOCS)
{
docs[upto] = docID;
if (hasFreq)
{
int freq = docsEnum2.Freq;
freqs[upto] = freq;
if (hasPos)
{
int mult;
if (hasOffsets)
{
mult = 3;
}
else
{
mult = 1;
}
if (hasPayloads)
{
payloads[upto] = new byte[freq][];
}
positions[upto] = new int[mult*freq];
int posUpto = 0;
for (int pos = 0; pos < freq; pos++)
{
positions[upto][posUpto] = docsAndPositionsEnum.NextPosition();
if (hasPayloads)
{
BytesRef payload = docsAndPositionsEnum.GetPayload();
if (payload != null)
{
var payloadBytes = new byte[payload.Length];
Arrays.Copy(payload.Bytes, payload.Offset, payloadBytes, 0,
payload.Length);
payloads[upto][pos] = payloadBytes;
}
}
posUpto++;
if (hasOffsets)
{
positions[upto][posUpto++] = docsAndPositionsEnum.StartOffset;
positions[upto][posUpto++] = docsAndPositionsEnum.EndOffset;
}
}
}
}
upto++;
}
if (Debugging.AssertsEnabled) Debugging.Assert(upto == docFreq);
ent = new HighFreqTerm(docs, freqs, positions, payloads, totalTermFreq);
}
terms[count] = ent;
SetSkips(count, termBytes);
count++;
}
// End sentinel:
termOffsets[count] = termOffset;
FinishSkips();
//System.out.println(skipCount + " skips: " + field);
this.termBytes = new byte[termOffset];
Arrays.Copy(termBytes, 0, this.termBytes, 0, termOffset);
// Pack skips:
this.skips = new int[skipCount];
this.skipOffsets = new int[1 + numTerms];
int skipOffset = 0;
for (int i = 0; i < numTerms; i++)
{
int[] termSkips = terms[i].Skips;
skipOffsets[i] = skipOffset;
if (termSkips != null)
{
Arrays.Copy(termSkips, 0, skips, skipOffset, termSkips.Length);
skipOffset += termSkips.Length;
terms[i].Skips = null;
}
}
this.skipOffsets[numTerms] = skipOffset;
if (Debugging.AssertsEnabled) Debugging.Assert(skipOffset == skipCount);
}