public void process()

in solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java [92:336]


  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.get(TermsParams.TERMS, "false").equals("true")) {
      return;
    }

    String[] fields = params.getParams(TermsParams.TERMS_FIELD);

    NamedList<Object> termsResult = new SimpleOrderedMap<>();
    rb.rsp.add(TermsParams.TERMS, termsResult);

    if (fields == null || fields.length == 0) {
      return;
    }

    boolean termStats = params.getBool(TermsParams.TERMS_STATS, false);

    if (termStats) {
      NamedList<Number> stats = new SimpleOrderedMap<>();
      rb.rsp.add("indexstats", stats);
      collectStats(rb.req.getSearcher(), stats);
    }

    String termList = params.get(TermsParams.TERMS_LIST);
    boolean includeTotalTermFreq = params.getBool(TermsParams.TERMS_TTF, false);
    if (termList != null) {
      fetchTerms(rb.req.getSearcher(), fields, termList, includeTotalTermFreq, termsResult);
      return;
    }

    int _limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
    final int limit = _limit < 0 ? Integer.MAX_VALUE : _limit;

    String lowerStr = params.get(TermsParams.TERMS_LOWER);
    String upperStr = params.get(TermsParams.TERMS_UPPER);
    boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
    boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
    boolean sort =
        !TermsParams.TERMS_SORT_INDEX.equals(
            params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
    int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
    int _freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
    final int freqmax = _freqmax < 0 ? Integer.MAX_VALUE : _freqmax;

    String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
    String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
    Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

    boolean raw = params.getBool(TermsParams.TERMS_RAW, false);

    final LeafReader indexReader = rb.req.getSearcher().getSlowAtomicReader();

    for (String field : fields) {
      NamedList<Object> fieldTerms = new NamedList<>();
      termsResult.add(field, fieldTerms);

      Terms terms = indexReader.terms(field);
      if (terms == null) {
        // field does not exist in terms index.  Check points.
        SchemaField sf = rb.req.getSchema().getFieldOrNull(field);
        if (sf != null && sf.getType().isPointField()) {
          // FIXME: terms.ttf=true is not supported for pointFields
          if (lowerStr != null || upperStr != null || prefix != null || regexp != null) {
            throw new SolrException(
                SolrException.ErrorCode.BAD_REQUEST,
                String.format(
                    Locale.ROOT,
                    "The terms component does not support Points-based fields with sorting or with parameters %s,%s,%s,%s ",
                    TermsParams.TERMS_LOWER,
                    TermsParams.TERMS_UPPER,
                    TermsParams.TERMS_PREFIX_STR,
                    TermsParams.TERMS_REGEXP_STR));
          }

          PointMerger.ValueIterator valueIterator =
              new PointMerger.ValueIterator(sf, rb.req.getSearcher().getRawReader().leaves());
          MutableValue mv = valueIterator.getMutableValue();
          if (sort) {
            BoundedTreeSet<CountPair<MutableValue, Integer>> queue = new BoundedTreeSet<>(limit);

            for (; ; ) {
              long count = valueIterator.getNextCount();
              if (count < 0) break;
              if (count < freqmin || count > freqmax) continue;
              if (queue.size() < limit
                  || queue.last().val < count
                  || (queue.last().val == count && queue.last().key.compareTo(mv) < 0)) {
                queue.add(new CountPair<>(mv.duplicate(), (int) count));
              }
            }

            for (CountPair<MutableValue, Integer> item : queue) {
              fieldTerms.add(Utils.OBJECT_TO_STRING.apply(item.key.toObject()), item.val);
            }
            continue;
          } else {
            /*
            // streaming solution that is deferred until writing the response
            // TODO: we can't use the streaming solution until XML writer supports PushWriter!
            termsResult.add(field, (MapWriter) ew -> {
              int num = 0;
              for(;;) {
                long count = valueIterator.getNextCount();
                if (count < 0) break;
                if (count < freqmin || count > freqmax) continue;
                if (++num > limit) break;
                ew.put(Utils.OBJECT_TO_STRING.apply(mv.toObject()), (int)count); // match the numeric type of terms
              }
            });
            */

            int num = 0;
            for (; ; ) {
              long count = valueIterator.getNextCount();
              if (count < 0) break;
              if (count < freqmin || count > freqmax) continue;
              if (++num > limit) break;
              fieldTerms.add(
                  Utils.OBJECT_TO_STRING.apply(mv.toObject()),
                  (int) count); // match the numeric type of terms
            }
            continue;
          }
        }
        continue;
      }

      FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
      if (ft == null) {
        ft = new StrField();
      }

      // prefix must currently be text
      BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix);

      BytesRef upperBytes = null;
      if (upperStr != null) {
        BytesRefBuilder b = new BytesRefBuilder();
        ft.readableToIndexed(upperStr, b);
        upperBytes = b.get();
      }

      BytesRef lowerBytes;
      if (lowerStr == null) {
        // If no lower bound was specified, use the prefix
        lowerBytes = prefixBytes;
      } else {
        if (raw) {
          // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
          // perhaps we detect if the FieldType is non-character and expect hex if so?
          lowerBytes = new BytesRef(lowerStr);
        } else {
          BytesRefBuilder b = new BytesRefBuilder();
          ft.readableToIndexed(lowerStr, b);
          lowerBytes = b.get();
        }
      }

      TermsEnum termsEnum = terms.iterator();
      BytesRef term = null;

      if (lowerBytes != null) {
        if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) {
          termsEnum = null;
        } else {
          term = termsEnum.term();
          // Only advance the enum if we are excluding the lower bound and the lower Term actually
          // matches
          if (lowerIncl == false && term.equals(lowerBytes)) {
            term = termsEnum.next();
          }
        }
      } else {
        // position termsEnum on first term
        term = termsEnum.next();
      }

      int i = 0;
      BoundedTreeSet<TermsResponse.Term> queue =
          (sort ? new BoundedTreeSet<>(limit, new TermCountComparator()) : null);
      CharsRefBuilder external = new CharsRefBuilder();
      while (term != null && (i < limit || sort)) {
        boolean externalized = false; // did we fill in "external" yet for this term?

        // stop if the prefix doesn't match
        if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break;

        if (pattern != null) {
          // indexed text or external text?
          // TODO: support "raw" mode?
          ft.indexedToReadable(term, external);
          externalized = true;
          if (!pattern.matcher(external.get()).matches()) {
            term = termsEnum.next();
            continue;
          }
        }

        if (upperBytes != null) {
          int upperCmp = term.compareTo(upperBytes);
          // if we are past the upper term, or equal to it (when don't include upper) then stop.
          if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break;
        }

        // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
        int docFreq = termsEnum.docFreq();
        if (docFreq >= freqmin && docFreq <= freqmax) {
          // TODO: handle raw somehow
          if (!externalized) {
            ft.indexedToReadable(term, external);
          }
          // add the term to the list
          if (sort) {
            queue.add(
                new TermsResponse.Term(external.toString(), docFreq, termsEnum.totalTermFreq()));
          } else {
            addTermToNamedList(
                fieldTerms,
                external.toString(),
                docFreq,
                termsEnum.totalTermFreq(),
                includeTotalTermFreq);
            i++;
          }
        }

        term = termsEnum.next();
      }

      if (sort) {
        for (TermsResponse.Term item : queue) {
          if (i >= limit) {
            break;
          }
          addTermToNamedList(
              fieldTerms,
              item.getTerm(),
              item.getFrequency(),
              item.getTotalTermFreq(),
              includeTotalTermFreq);
          i++;
        }
      }
    }
  }