in lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java [108:273]
public Void call() throws IOException {
// If we're counting collected hits but there were none, short-circuit:
if (hits != null && hits.totalHits() == 0) {
return null;
}
SortedSetDocValues multiValues = DocValues.getSortedSet(leafReader, field);
if (multiValues == null) {
// nothing to count here
return null;
}
// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
DocIdSetIterator valuesIt = singleValues != null ? singleValues : multiValues;
// TODO: yet another option is to count all segs
// first, only in seg-ord space, and then do a
// merge-sort-PQ in the end to only "resolve to
// global" those seg ords that can compete, if we know
// we just want top K? ie, this is the same algo
// that'd be used for merging facets across shards
// (distributed faceting). but this has much higher
// temp ram req'ts (sum of number of ords across all
// segs)
DocIdSetIterator it;
if (hits == null) {
// count all
// Initializing liveDocs bits in the constructor leads to a situation where liveDocs bits
// get initialized in the calling thread but get used in a different thread leading to an
// AssertionError. See LUCENE-10134
final Bits liveDocs = leafReader.getLiveDocs();
it = (liveDocs != null) ? FacetUtils.liveDocsDISI(valuesIt, liveDocs) : valuesIt;
} else {
it = ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits().iterator(), valuesIt));
}
if (ordinalMap != null) {
final LongValues ordMap = ordinalMap.getGlobalOrds(segOrd);
int numSegOrds = (int) multiValues.getValueCount();
if (hits != null && hits.totalHits() < numSegOrds / 10) {
// Remap every ord to global ord as we iterate:
if (singleValues != null) {
if (singleValues == it) {
for (int doc = singleValues.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = singleValues.nextDoc()) {
counts.incrementAndGet((int) ordMap.get(singleValues.ordValue()));
}
} else {
for (int doc = it.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = it.nextDoc()) {
counts.incrementAndGet((int) ordMap.get(singleValues.ordValue()));
}
}
} else {
if (multiValues == it) {
for (int doc = multiValues.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = multiValues.nextDoc()) {
for (int i = 0; i < multiValues.docValueCount(); i++) {
int term = (int) multiValues.nextOrd();
counts.incrementAndGet((int) ordMap.get(term));
}
}
} else {
for (int doc = it.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = it.nextDoc()) {
for (int i = 0; i < multiValues.docValueCount(); i++) {
int term = (int) multiValues.nextOrd();
counts.incrementAndGet((int) ordMap.get(term));
}
}
}
}
} else {
// First count in seg-ord space:
final int[] segCounts = new int[numSegOrds];
if (singleValues != null) {
if (singleValues == it) {
for (int doc = singleValues.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = singleValues.nextDoc()) {
segCounts[singleValues.ordValue()]++;
}
} else {
for (int doc = it.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = it.nextDoc()) {
segCounts[singleValues.ordValue()]++;
}
}
} else {
if (multiValues == it) {
for (int doc = multiValues.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = multiValues.nextDoc()) {
for (int i = 0; i < multiValues.docValueCount(); i++) {
int term = (int) multiValues.nextOrd();
segCounts[term]++;
}
}
} else {
for (int doc = it.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = it.nextDoc()) {
for (int i = 0; i < multiValues.docValueCount(); i++) {
int term = (int) multiValues.nextOrd();
segCounts[term]++;
}
}
}
}
// Then, migrate to global ords:
for (int ord = 0; ord < numSegOrds; ord++) {
int count = segCounts[ord];
if (count != 0) {
counts.addAndGet((int) ordMap.get(ord), count);
}
}
}
} else {
// No ord mapping (e.g., single segment index):
// just aggregate directly into counts:
if (singleValues != null) {
if (singleValues == it) {
for (int doc = singleValues.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = singleValues.nextDoc()) {
counts.incrementAndGet(singleValues.ordValue());
}
} else {
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
counts.incrementAndGet(singleValues.ordValue());
}
}
} else {
if (multiValues == it) {
for (int doc = multiValues.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = multiValues.nextDoc()) {
for (int i = 0; i < multiValues.docValueCount(); i++) {
int term = (int) multiValues.nextOrd();
counts.incrementAndGet(term);
}
}
} else {
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
for (int i = 0; i < multiValues.docValueCount(); i++) {
int term = (int) multiValues.nextOrd();
counts.incrementAndGet(term);
}
}
}
}
}
return null;
}