private ImmutableRoaringBitmap getMatchingFlattenedDocIds()

in pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java [266:502]


  private ImmutableRoaringBitmap getMatchingFlattenedDocIds(Predicate predicate) {
    ExpressionContext lhs = predicate.getLhs();
    Preconditions.checkArgument(lhs.getType() == ExpressionContext.Type.IDENTIFIER,
        "Left-hand side of the predicate must be an identifier, got: %s (%s). Put double quotes around the identifier"
            + " if needed.", lhs, lhs.getType());
    String key = lhs.getIdentifier();
    // Support 2 formats:
    // - JSONPath format (e.g. "$.a[1].b"='abc', "$[0]"=1, "$"='abc')
    // - Legacy format (e.g. "a[1].b"='abc')
    if (_version == BaseJsonIndexCreator.VERSION_2) {
      if (key.startsWith("$")) {
        key = key.substring(1);
      } else {
        key = JsonUtils.KEY_SEPARATOR + key;
      }
    } else {
      // For V1 backward-compatibility
      if (key.startsWith("$.")) {
        key = key.substring(2);
      }
    }
    Pair<String, ImmutableRoaringBitmap> pair = getKeyAndFlattenedDocIds(key);
    key = pair.getLeft();
    ImmutableRoaringBitmap matchingDocIds = pair.getRight();
    if (matchingDocIds != null && matchingDocIds.isEmpty()) {
      return matchingDocIds;
    }

    Predicate.Type predicateType = predicate.getType();
    switch (predicateType) {
      case EQ: {
        String value = ((EqPredicate) predicate).getValue();
        String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR + value;
        int dictId = _dictionary.indexOf(keyValuePair);
        ImmutableRoaringBitmap result = null;
        if (dictId >= 0) {
          result = _invertedIndex.getDocIds(dictId);
        }
        return filter(result, matchingDocIds);
      }

      case NOT_EQ: {
        // each array is un-nested and so flattened json document contains only one value
        // that means for each key-value pair the set of flattened document ids is disjoint
        String notEqualValue = ((NotEqPredicate) predicate).getValue();
        ImmutableRoaringBitmap result = null;

        // read bitmap with all values for this key instead of OR-ing many per-value bitmaps
        int allValuesDictId = _dictionary.indexOf(key);
        if (allValuesDictId >= 0) {
          ImmutableRoaringBitmap allValuesDocIds = _invertedIndex.getDocIds(allValuesDictId);

          if (!allValuesDocIds.isEmpty()) {
            int notEqDictId = _dictionary.indexOf(key + JsonIndexCreator.KEY_VALUE_SEPARATOR + notEqualValue);
            if (notEqDictId >= 0) {
              ImmutableRoaringBitmap notEqDocIds = _invertedIndex.getDocIds(notEqDictId);
              if (notEqDocIds.isEmpty()) {
                //  there's no value to remove, use found bitmap (is this possible ?)
                result = allValuesDocIds;
              } else {
                // remove doc ids for unwanted value
                MutableRoaringBitmap mutableBitmap = allValuesDocIds.toMutableRoaringBitmap();
                mutableBitmap.andNot(notEqDocIds);
                result = mutableBitmap;
              }
            } else { // there's no value to remove, use found bitmap
              result = allValuesDocIds;
            }
          }
        }

        return filter(result, matchingDocIds);
      }

      case IN: {
        List<String> values = ((InPredicate) predicate).getValues();
        ImmutableRoaringBitmap result = null;
        for (String value : values) {
          String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR + value;
          int dictId = _dictionary.indexOf(keyValuePair);
          if (dictId >= 0) {
            ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
            if (result == null) {
              result = docIds;
            } else {
              result = or(result, docIds);
            }
          }
        }

        return filter(result, matchingDocIds);
      }

      case NOT_IN: {
        List<String> notInValues = ((NotInPredicate) predicate).getValues();
        int[] dictIds = getDictIdRangeForKey(key);
        ImmutableRoaringBitmap result = null;

        int valueCount = dictIds[1] - dictIds[0];

        if (notInValues.size() < valueCount / 2) {
          // if there is less notIn values than In values
          // read bitmap for all values and then remove values from bitmaps associated with notIn values

          int allValuesDictId = _dictionary.indexOf(key);
          if (allValuesDictId >= 0) {
            ImmutableRoaringBitmap allValuesDocIds = _invertedIndex.getDocIds(allValuesDictId);

            if (!allValuesDocIds.isEmpty()) {
              result = allValuesDocIds;

              for (String notInValue : notInValues) {
                int notInDictId = _dictionary.indexOf(key + JsonIndexCreator.KEY_VALUE_SEPARATOR + notInValue);
                if (notInDictId >= 0) {
                  ImmutableRoaringBitmap notEqDocIds = _invertedIndex.getDocIds(notInDictId);
                  // remove doc ids for unwanted value
                  MutableRoaringBitmap mutableBitmap = toMutable(result);
                  mutableBitmap.andNot(notEqDocIds);
                  result = mutableBitmap;
                }
              }
            }
          }
        } else {
          // if there is more In values than notIn then OR bitmaps for all values except notIn values
          // resolve dict ids for string values to avoid comparing strings
          IntOpenHashSet notInDictIds = null;
          if (dictIds[0] < dictIds[1]) {
            notInDictIds = new IntOpenHashSet();
            for (String notInValue : notInValues) {
              int dictId = _dictionary.indexOf(key + JsonIndexCreator.KEY_VALUE_SEPARATOR + notInValue);
              if (dictId >= 0) {
                notInDictIds.add(dictId);
              }
            }
          }

          for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
            if (notInDictIds.contains(dictId)) {
              continue;
            }

            if (result == null) {
              result = _invertedIndex.getDocIds(dictId);
            } else {
              result = or(result, _invertedIndex.getDocIds(dictId));
            }
          }
        }

        return filter(result, matchingDocIds);
      }

      case IS_NOT_NULL:
      case IS_NULL: {
        ImmutableRoaringBitmap result = null;
        int dictId = _dictionary.indexOf(key);
        if (dictId >= 0) {
          result = _invertedIndex.getDocIds(dictId);
        }

        return filter(result, matchingDocIds);
      }

      case REGEXP_LIKE: {
        Pattern pattern = ((RegexpLikePredicate) predicate).getPattern();
        Matcher matcher = pattern.matcher("");
        int[] dictIds = getDictIdRangeForKey(key);

        ImmutableRoaringBitmap result = null;
        byte[] dictBuffer = dictIds[0] < dictIds[1] ? _dictionary.getBuffer() : null;
        StringBuilder value = new StringBuilder();

        for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
          String stringValue = _dictionary.getStringValue(dictId, dictBuffer);
          value.setLength(0);
          value.append(stringValue, key.length() + 1, stringValue.length());

          if (matcher.reset(value).matches()) {
            if (result == null) {
              result = _invertedIndex.getDocIds(dictId);
            } else {
              result = or(result, _invertedIndex.getDocIds(dictId));
            }
          }
        }

        return filter(result, matchingDocIds);
      }

      case RANGE: {
        RangePredicate rangePredicate = (RangePredicate) predicate;
        FieldSpec.DataType rangeDataType = rangePredicate.getRangeDataType();
        // Simplify to only support numeric and string types
        if (rangeDataType.isNumeric()) {
          rangeDataType = FieldSpec.DataType.DOUBLE;
        } else {
          rangeDataType = FieldSpec.DataType.STRING;
        }

        boolean lowerUnbounded = rangePredicate.getLowerBound().equals(RangePredicate.UNBOUNDED);
        boolean upperUnbounded = rangePredicate.getUpperBound().equals(RangePredicate.UNBOUNDED);
        boolean lowerInclusive = lowerUnbounded || rangePredicate.isLowerInclusive();
        boolean upperInclusive = upperUnbounded || rangePredicate.isUpperInclusive();
        Object lowerBound = lowerUnbounded ? null : rangeDataType.convert(rangePredicate.getLowerBound());
        Object upperBound = upperUnbounded ? null : rangeDataType.convert(rangePredicate.getUpperBound());

        int[] dictIds = getDictIdRangeForKey(key);
        ImmutableRoaringBitmap result = null;
        byte[] dictBuffer = dictIds[0] < dictIds[1] ? _dictionary.getBuffer() : null;

        for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
          String value = _dictionary.getStringValue(dictId, dictBuffer).substring(key.length() + 1);
          Object valueObj = rangeDataType.convert(value);
          boolean lowerCompareResult =
              lowerUnbounded || (lowerInclusive ? rangeDataType.compare(valueObj, lowerBound) >= 0
                  : rangeDataType.compare(valueObj, lowerBound) > 0);
          boolean upperCompareResult =
              upperUnbounded || (upperInclusive ? rangeDataType.compare(valueObj, upperBound) <= 0
                  : rangeDataType.compare(valueObj, upperBound) < 0);

          if (lowerCompareResult && upperCompareResult) {
            if (result == null) {
              result = _invertedIndex.getDocIds(dictId);
            } else {
              result = or(result, _invertedIndex.getDocIds(dictId));
            }
          }
        }

        return filter(result, matchingDocIds);
      }

      default:
        throw new IllegalStateException("Unsupported json_match predicate type: " + predicate);
    }
  }