private static NamedList getCountsSingleValue()

in solr/core/src/java/org/apache/solr/request/NumericFacets.java [181:453]
251 lines of code
66 McCabe index (conditional complexity)

  private static NamedList<Integer> getCountsSingleValue(
      SolrIndexSearcher searcher,
      DocSet docs,
      String fieldName,
      int offset,
      int limit,
      int mincount,
      boolean missing,
      String sort)
      throws IOException {
    boolean zeros = mincount <= 0;
    mincount = Math.max(mincount, 1);
    final SchemaField sf = searcher.getSchema().getField(fieldName);
    final FieldType ft = sf.getType();
    final NumberType numericType = ft.getNumberType();
    if (numericType == null) {
      throw new IllegalStateException();
    }
    zeros =
        zeros
            && !ft.isPointField()
            && sf.indexed(); // We don't return zeros when using PointFields or when index=false
    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();

    // 1. accumulate
    final HashTable hashTable = new HashTable(true);
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    NumericDocValues longs = null;
    int missingCount = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
      final int doc = docsIt.nextDoc();
      if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
        do {
          ctx = ctxIt.next();
        } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
        assert doc >= ctx.docBase;
        switch (numericType) {
          case LONG:
          case DATE:
          case INTEGER:
            // Long, Date and Integer
            longs = DocValues.getNumeric(ctx.reader(), fieldName);
            break;
          case FLOAT:
            // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
            longs =
                new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {
                  @Override
                  public long longValue() throws IOException {
                    long bits = super.longValue();
                    if (bits < 0) bits ^= 0x7fffffffffffffffL;
                    return bits;
                  }
                };
            break;
          case DOUBLE:
            // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
            longs =
                new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {
                  @Override
                  public long longValue() throws IOException {
                    long bits = super.longValue();
                    if (bits < 0) bits ^= 0x7fffffffffffffffL;
                    return bits;
                  }
                };
            break;
          default:
            throw new AssertionError("Unexpected type: " + numericType);
        }
      }
      int valuesDocID = longs.docID();
      if (valuesDocID < doc - ctx.docBase) {
        valuesDocID = longs.advance(doc - ctx.docBase);
      }
      if (valuesDocID == doc - ctx.docBase) {
        hashTable.add(doc, longs.longValue(), 1);
      } else {
        ++missingCount;
      }
    }

    final NamedList<Integer> result = new NamedList<>();
    if (limit == 0) {
      return finalize(result, missingCount, missing);
    }

    // 2. select top-k facet values
    final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
    final PriorityQueue<Entry> pq;
    if (FacetParams.FACET_SORT_COUNT.equals(sort)
        || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
      pq =
          new PriorityQueue<>(pqSize) {
            @Override
            protected boolean lessThan(Entry a, Entry b) {
              if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
                return true;
              } else {
                return false;
              }
            }
          };
    } else {
      pq =
          new PriorityQueue<>(pqSize) {
            @Override
            protected boolean lessThan(Entry a, Entry b) {
              return a.bits > b.bits;
            }
          };
    }
    Entry e = null;
    for (int i = 0; i < hashTable.bits.length; ++i) {
      if (hashTable.counts[i] >= mincount) {
        if (e == null) {
          e = new Entry();
        }
        e.bits = hashTable.bits[i];
        e.count = hashTable.counts[i];
        e.docID = hashTable.docIDs[i];
        e = pq.insertWithOverflow(e);
      }
    }

    // 4. build the NamedList
    final ValueSource vs = ft.getValueSource(sf, null);

    // This stuff is complicated because if facet.mincount=0, the counts needs
    // to be merged with terms from the terms dict
    if (!zeros
        || FacetParams.FACET_SORT_COUNT.equals(sort)
        || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
      // Only keep items we're interested in
      final Deque<Entry> counts = new ArrayDeque<>();
      while (pq.size() > offset) {
        counts.addFirst(pq.pop());
      }

      // Entries from the PQ first, then using the terms dictionary
      for (Entry entry : counts) {
        final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
        final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
        result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
      }

      if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
        if (!sf.indexed() && !sf.hasDocValues()) {
          throw new IllegalStateException(
              "Cannot use "
                  + FacetParams.FACET_MINCOUNT
                  + "=0 on field "
                  + sf.getName()
                  + " which is neither indexed nor docValues");
        }
        // Add zeros until there are limit results
        final Set<String> alreadySeen = new HashSet<>();
        while (pq.size() > 0) {
          Entry entry = pq.pop();
          final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
          final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
          alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
        }
        result.forEach((name, __) -> alreadySeen.add(name));

        final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
        if (terms != null) {
          final String prefixStr = TrieField.getMainValuePrefix(ft);
          final BytesRef prefix;
          if (prefixStr != null) {
            prefix = new BytesRef(prefixStr);
          } else {
            prefix = new BytesRef();
          }
          final TermsEnum termsEnum = terms.iterator();
          BytesRef term;
          switch (termsEnum.seekCeil(prefix)) {
            case FOUND:
            case NOT_FOUND:
              term = termsEnum.term();
              break;
            case END:
              term = null;
              break;
            default:
              throw new AssertionError();
          }
          final CharsRefBuilder spare = new CharsRefBuilder();
          for (int skipped = hashTable.size;
              skipped < offset && term != null && StringHelper.startsWith(term, prefix); ) {
            ft.indexedToReadable(term, spare);
            final String termStr = spare.toString();
            if (!alreadySeen.contains(termStr)) {
              ++skipped;
            }
            term = termsEnum.next();
          }
          for (;
              term != null
                  && StringHelper.startsWith(term, prefix)
                  && (limit < 0 || result.size() < limit);
              term = termsEnum.next()) {
            ft.indexedToReadable(term, spare);
            final String termStr = spare.toString();
            if (!alreadySeen.contains(termStr)) {
              result.add(termStr, 0);
            }
          }
        }
      }
    } else {
      // sort=index, mincount=0 and we have less than limit items
      // => Merge the PQ and the terms dictionary on the fly
      if (!sf.indexed()) {
        throw new IllegalStateException(
            "Cannot use "
                + FacetParams.FACET_SORT
                + "="
                + FacetParams.FACET_SORT_INDEX
                + " on a field which is not indexed");
      }
      final Map<String, Integer> counts = new HashMap<>();
      while (pq.size() > 0) {
        final Entry entry = pq.pop();
        final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
        final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
        counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
      }
      final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
      if (terms != null) {
        final String prefixStr = TrieField.getMainValuePrefix(ft);
        final BytesRef prefix;
        if (prefixStr != null) {
          prefix = new BytesRef(prefixStr);
        } else {
          prefix = new BytesRef();
        }
        final TermsEnum termsEnum = terms.iterator();
        BytesRef term;
        switch (termsEnum.seekCeil(prefix)) {
          case FOUND:
          case NOT_FOUND:
            term = termsEnum.term();
            break;
          case END:
            term = null;
            break;
          default:
            throw new AssertionError();
        }
        final CharsRefBuilder spare = new CharsRefBuilder();
        for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
          term = termsEnum.next();
        }
        for (;
            term != null
                && StringHelper.startsWith(term, prefix)
                && (limit < 0 || result.size() < limit);
            term = termsEnum.next()) {
          ft.indexedToReadable(term, spare);
          final String termStr = spare.toString();
          Integer count = counts.get(termStr);
          if (count == null) {
            count = 0;
          }
          result.add(termStr, count);
        }
      }
    }

    return finalize(result, missingCount, missing);
  }