in lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java [287:499]
public DirectField(
SegmentReadState state, String field, Terms termsIn, int minSkipCount, int lowFreqCutoff)
throws IOException {
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
sumTotalTermFreq = termsIn.getSumTotalTermFreq();
sumDocFreq = termsIn.getSumDocFreq();
docCount = termsIn.getDocCount();
final int numTerms = (int) termsIn.size();
if (numTerms == -1) {
throw new IllegalArgumentException("codec does not provide Terms.size()");
}
terms = new TermAndSkip[numTerms];
termOffsets = new int[1 + numTerms];
byte[] termBytes = new byte[1024];
this.minSkipCount = minSkipCount;
hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS) > 0;
hasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) > 0;
hasOffsets =
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0;
hasPayloads = fieldInfo.hasPayloads();
BytesRef term;
PostingsEnum postingsEnum = null;
PostingsEnum docsAndPositionsEnum = null;
final TermsEnum termsEnum = termsIn.iterator();
int termOffset = 0;
final IntArrayWriter scratch = new IntArrayWriter();
// Used for payloads, if any:
final ByteBuffersDataOutput ros = ByteBuffersDataOutput.newResettableInstance();
// if (DEBUG) {
// System.out.println("\nLOAD terms seg=" + state.segmentInfo.name + " field=" + field + "
// hasOffsets=" + hasOffsets + " hasFreq=" + hasFreq + " hasPos=" + hasPos + " hasPayloads=" +
// hasPayloads);
// }
while ((term = termsEnum.next()) != null) {
final int docFreq = termsEnum.docFreq();
final long totalTermFreq = termsEnum.totalTermFreq();
// if (DEBUG) {
// System.out.println(" term=" + term.utf8ToString());
// }
termOffsets[count] = termOffset;
if (termBytes.length < (termOffset + term.length)) {
termBytes = ArrayUtil.grow(termBytes, termOffset + term.length);
}
System.arraycopy(term.bytes, term.offset, termBytes, termOffset, term.length);
termOffset += term.length;
termOffsets[count + 1] = termOffset;
if (hasPos) {
docsAndPositionsEnum = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL);
} else {
postingsEnum = termsEnum.postings(postingsEnum);
}
final TermAndSkip ent;
final PostingsEnum postingsEnum2;
if (hasPos) {
postingsEnum2 = docsAndPositionsEnum;
} else {
postingsEnum2 = postingsEnum;
}
int docID;
if (docFreq <= lowFreqCutoff) {
ros.reset();
// Pack postings for low-freq terms into a single int[]:
while ((docID = postingsEnum2.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
scratch.add(docID);
if (hasFreq) {
final int freq = postingsEnum2.freq();
scratch.add(freq);
if (hasPos) {
for (int pos = 0; pos < freq; pos++) {
scratch.add(docsAndPositionsEnum.nextPosition());
if (hasOffsets) {
scratch.add(docsAndPositionsEnum.startOffset());
scratch.add(docsAndPositionsEnum.endOffset());
}
if (hasPayloads) {
final BytesRef payload = docsAndPositionsEnum.getPayload();
if (payload != null) {
scratch.add(payload.length);
ros.writeBytes(payload.bytes, payload.offset, payload.length);
} else {
scratch.add(0);
}
}
}
}
}
}
final byte[] payloads = hasPayloads ? ros.toArrayCopy() : null;
final int[] postings = scratch.get();
ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
} else {
final int[] docs = new int[docFreq];
final int[] freqs;
final int[][] positions;
final byte[][][] payloads;
if (hasFreq) {
freqs = new int[docFreq];
if (hasPos) {
positions = new int[docFreq][];
if (hasPayloads) {
payloads = new byte[docFreq][][];
} else {
payloads = null;
}
} else {
positions = null;
payloads = null;
}
} else {
freqs = null;
positions = null;
payloads = null;
}
// Use separate int[] for the postings for high-freq
// terms:
int upto = 0;
while ((docID = postingsEnum2.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
docs[upto] = docID;
if (hasFreq) {
final int freq = postingsEnum2.freq();
freqs[upto] = freq;
if (hasPos) {
final int mult;
if (hasOffsets) {
mult = 3;
} else {
mult = 1;
}
if (hasPayloads) {
payloads[upto] = new byte[freq][];
}
positions[upto] = new int[mult * freq];
int posUpto = 0;
for (int pos = 0; pos < freq; pos++) {
positions[upto][posUpto] = docsAndPositionsEnum.nextPosition();
if (hasPayloads) {
BytesRef payload = docsAndPositionsEnum.getPayload();
if (payload != null) {
byte[] payloadBytes = new byte[payload.length];
System.arraycopy(
payload.bytes, payload.offset, payloadBytes, 0, payload.length);
payloads[upto][pos] = payloadBytes;
}
}
posUpto++;
if (hasOffsets) {
positions[upto][posUpto++] = docsAndPositionsEnum.startOffset();
positions[upto][posUpto++] = docsAndPositionsEnum.endOffset();
}
}
}
}
upto++;
}
assert upto == docFreq;
ent = new HighFreqTerm(docs, freqs, positions, payloads, totalTermFreq);
}
terms[count] = ent;
setSkips(count, termBytes);
count++;
}
// End sentinel:
termOffsets[count] = termOffset;
finishSkips();
// System.out.println(skipCount + " skips: " + field);
this.termBytes = new byte[termOffset];
System.arraycopy(termBytes, 0, this.termBytes, 0, termOffset);
// Pack skips:
this.skips = new int[skipCount];
this.skipOffsets = new int[1 + numTerms];
int skipOffset = 0;
for (int i = 0; i < numTerms; i++) {
final int[] termSkips = terms[i].skips;
skipOffsets[i] = skipOffset;
if (termSkips != null) {
System.arraycopy(termSkips, 0, skips, skipOffset, termSkips.length);
skipOffset += termSkips.length;
terms[i].skips = null;
}
}
this.skipOffsets[numTerms] = skipOffset;
assert skipOffset == skipCount;
}