in pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java [266:502]
private ImmutableRoaringBitmap getMatchingFlattenedDocIds(Predicate predicate) {
ExpressionContext lhs = predicate.getLhs();
Preconditions.checkArgument(lhs.getType() == ExpressionContext.Type.IDENTIFIER,
"Left-hand side of the predicate must be an identifier, got: %s (%s). Put double quotes around the identifier"
+ " if needed.", lhs, lhs.getType());
String key = lhs.getIdentifier();
// Support 2 formats:
// - JSONPath format (e.g. "$.a[1].b"='abc', "$[0]"=1, "$"='abc')
// - Legacy format (e.g. "a[1].b"='abc')
if (_version == BaseJsonIndexCreator.VERSION_2) {
if (key.startsWith("$")) {
key = key.substring(1);
} else {
key = JsonUtils.KEY_SEPARATOR + key;
}
} else {
// For V1 backward-compatibility
if (key.startsWith("$.")) {
key = key.substring(2);
}
}
Pair<String, ImmutableRoaringBitmap> pair = getKeyAndFlattenedDocIds(key);
key = pair.getLeft();
ImmutableRoaringBitmap matchingDocIds = pair.getRight();
if (matchingDocIds != null && matchingDocIds.isEmpty()) {
return matchingDocIds;
}
Predicate.Type predicateType = predicate.getType();
switch (predicateType) {
case EQ: {
String value = ((EqPredicate) predicate).getValue();
String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR + value;
int dictId = _dictionary.indexOf(keyValuePair);
ImmutableRoaringBitmap result = null;
if (dictId >= 0) {
result = _invertedIndex.getDocIds(dictId);
}
return filter(result, matchingDocIds);
}
case NOT_EQ: {
// each array is un-nested and so flattened json document contains only one value
// that means for each key-value pair the set of flattened document ids is disjoint
String notEqualValue = ((NotEqPredicate) predicate).getValue();
ImmutableRoaringBitmap result = null;
// read bitmap with all values for this key instead of OR-ing many per-value bitmaps
int allValuesDictId = _dictionary.indexOf(key);
if (allValuesDictId >= 0) {
ImmutableRoaringBitmap allValuesDocIds = _invertedIndex.getDocIds(allValuesDictId);
if (!allValuesDocIds.isEmpty()) {
int notEqDictId = _dictionary.indexOf(key + JsonIndexCreator.KEY_VALUE_SEPARATOR + notEqualValue);
if (notEqDictId >= 0) {
ImmutableRoaringBitmap notEqDocIds = _invertedIndex.getDocIds(notEqDictId);
if (notEqDocIds.isEmpty()) {
// there's no value to remove, use found bitmap (is this possible ?)
result = allValuesDocIds;
} else {
// remove doc ids for unwanted value
MutableRoaringBitmap mutableBitmap = allValuesDocIds.toMutableRoaringBitmap();
mutableBitmap.andNot(notEqDocIds);
result = mutableBitmap;
}
} else { // there's no value to remove, use found bitmap
result = allValuesDocIds;
}
}
}
return filter(result, matchingDocIds);
}
case IN: {
List<String> values = ((InPredicate) predicate).getValues();
ImmutableRoaringBitmap result = null;
for (String value : values) {
String keyValuePair = key + JsonIndexCreator.KEY_VALUE_SEPARATOR + value;
int dictId = _dictionary.indexOf(keyValuePair);
if (dictId >= 0) {
ImmutableRoaringBitmap docIds = _invertedIndex.getDocIds(dictId);
if (result == null) {
result = docIds;
} else {
result = or(result, docIds);
}
}
}
return filter(result, matchingDocIds);
}
case NOT_IN: {
List<String> notInValues = ((NotInPredicate) predicate).getValues();
int[] dictIds = getDictIdRangeForKey(key);
ImmutableRoaringBitmap result = null;
int valueCount = dictIds[1] - dictIds[0];
if (notInValues.size() < valueCount / 2) {
// if there is less notIn values than In values
// read bitmap for all values and then remove values from bitmaps associated with notIn values
int allValuesDictId = _dictionary.indexOf(key);
if (allValuesDictId >= 0) {
ImmutableRoaringBitmap allValuesDocIds = _invertedIndex.getDocIds(allValuesDictId);
if (!allValuesDocIds.isEmpty()) {
result = allValuesDocIds;
for (String notInValue : notInValues) {
int notInDictId = _dictionary.indexOf(key + JsonIndexCreator.KEY_VALUE_SEPARATOR + notInValue);
if (notInDictId >= 0) {
ImmutableRoaringBitmap notEqDocIds = _invertedIndex.getDocIds(notInDictId);
// remove doc ids for unwanted value
MutableRoaringBitmap mutableBitmap = toMutable(result);
mutableBitmap.andNot(notEqDocIds);
result = mutableBitmap;
}
}
}
}
} else {
// if there is more In values than notIn then OR bitmaps for all values except notIn values
// resolve dict ids for string values to avoid comparing strings
IntOpenHashSet notInDictIds = null;
if (dictIds[0] < dictIds[1]) {
notInDictIds = new IntOpenHashSet();
for (String notInValue : notInValues) {
int dictId = _dictionary.indexOf(key + JsonIndexCreator.KEY_VALUE_SEPARATOR + notInValue);
if (dictId >= 0) {
notInDictIds.add(dictId);
}
}
}
for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
if (notInDictIds.contains(dictId)) {
continue;
}
if (result == null) {
result = _invertedIndex.getDocIds(dictId);
} else {
result = or(result, _invertedIndex.getDocIds(dictId));
}
}
}
return filter(result, matchingDocIds);
}
case IS_NOT_NULL:
case IS_NULL: {
ImmutableRoaringBitmap result = null;
int dictId = _dictionary.indexOf(key);
if (dictId >= 0) {
result = _invertedIndex.getDocIds(dictId);
}
return filter(result, matchingDocIds);
}
case REGEXP_LIKE: {
Pattern pattern = ((RegexpLikePredicate) predicate).getPattern();
Matcher matcher = pattern.matcher("");
int[] dictIds = getDictIdRangeForKey(key);
ImmutableRoaringBitmap result = null;
byte[] dictBuffer = dictIds[0] < dictIds[1] ? _dictionary.getBuffer() : null;
StringBuilder value = new StringBuilder();
for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
String stringValue = _dictionary.getStringValue(dictId, dictBuffer);
value.setLength(0);
value.append(stringValue, key.length() + 1, stringValue.length());
if (matcher.reset(value).matches()) {
if (result == null) {
result = _invertedIndex.getDocIds(dictId);
} else {
result = or(result, _invertedIndex.getDocIds(dictId));
}
}
}
return filter(result, matchingDocIds);
}
case RANGE: {
RangePredicate rangePredicate = (RangePredicate) predicate;
FieldSpec.DataType rangeDataType = rangePredicate.getRangeDataType();
// Simplify to only support numeric and string types
if (rangeDataType.isNumeric()) {
rangeDataType = FieldSpec.DataType.DOUBLE;
} else {
rangeDataType = FieldSpec.DataType.STRING;
}
boolean lowerUnbounded = rangePredicate.getLowerBound().equals(RangePredicate.UNBOUNDED);
boolean upperUnbounded = rangePredicate.getUpperBound().equals(RangePredicate.UNBOUNDED);
boolean lowerInclusive = lowerUnbounded || rangePredicate.isLowerInclusive();
boolean upperInclusive = upperUnbounded || rangePredicate.isUpperInclusive();
Object lowerBound = lowerUnbounded ? null : rangeDataType.convert(rangePredicate.getLowerBound());
Object upperBound = upperUnbounded ? null : rangeDataType.convert(rangePredicate.getUpperBound());
int[] dictIds = getDictIdRangeForKey(key);
ImmutableRoaringBitmap result = null;
byte[] dictBuffer = dictIds[0] < dictIds[1] ? _dictionary.getBuffer() : null;
for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
String value = _dictionary.getStringValue(dictId, dictBuffer).substring(key.length() + 1);
Object valueObj = rangeDataType.convert(value);
boolean lowerCompareResult =
lowerUnbounded || (lowerInclusive ? rangeDataType.compare(valueObj, lowerBound) >= 0
: rangeDataType.compare(valueObj, lowerBound) > 0);
boolean upperCompareResult =
upperUnbounded || (upperInclusive ? rangeDataType.compare(valueObj, upperBound) <= 0
: rangeDataType.compare(valueObj, upperBound) < 0);
if (lowerCompareResult && upperCompareResult) {
if (result == null) {
result = _invertedIndex.getDocIds(dictId);
} else {
result = or(result, _invertedIndex.getDocIds(dictId));
}
}
}
return filter(result, matchingDocIds);
}
default:
throw new IllegalStateException("Unsupported json_match predicate type: " + predicate);
}
}