in processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java [253:522]
public DimensionSelector makeDimensionSelector(
final DimensionSpec spec,
final IncrementalIndexRowHolder currEntry,
final IncrementalIndex.DimensionDesc desc
)
{
final ExtractionFn extractionFn = spec.getExtractionFn();
final int dimIndex = desc.getIndex();
// maxId is used in concert with getLastRowIndex() in IncrementalIndex to ensure that callers do not encounter
// rows that contain IDs over the initially-reported cardinality. The main idea is that IncrementalIndex establishes
// a watermark at the time a cursor is created, and doesn't allow the cursor to walk past that watermark.
//
// Additionally, this selector explicitly blocks knowledge of IDs past maxId that may occur from other causes
// (for example: nulls getting generated for empty arrays, or calls to lookupId).
final int maxId = getCardinality();
class IndexerDimensionSelector implements DimensionSelector, IdLookup
{
private final ArrayBasedIndexedInts indexedInts = new ArrayBasedIndexedInts();
@Nullable
@MonotonicNonNull
private int[] nullIdIntArray;
@Override
public IndexedInts getRow()
{
final Object[] dims = currEntry.get().getDims();
@Nullable
int[] indices;
if (dimIndex < dims.length) {
indices = (int[]) dims[dimIndex];
} else {
indices = null;
}
int[] row = null;
int rowSize = 0;
// usually due to currEntry's rowIndex is smaller than the row's rowIndex in which this dim first appears
if (indices == null || indices.length == 0) {
if (hasMultipleValues) {
row = IntArrays.EMPTY_ARRAY;
} else {
final int nullId = getEncodedValue(null, false);
DruidException.conditionalDefensive(
nullId >= 0 && nullId < maxId,
"Null value not present in dictionary, how did this happen?"
);
if (nullIdIntArray == null) {
nullIdIntArray = new int[]{nullId};
}
row = nullIdIntArray;
rowSize = 1;
}
} else {
row = indices;
rowSize = indices.length;
}
indexedInts.setValues(row, rowSize);
return indexedInts;
}
@Override
public ValueMatcher makeValueMatcher(@Nullable final String value)
{
if (extractionFn != null) {
// Employ caching BitSet optimization
return makeValueMatcher(StringPredicateDruidPredicateFactory.equalTo(value));
}
final int valueId = lookupId(value);
final int nullValueId = lookupId(null);
if (valueId >= 0 || value == null) {
return new ValueMatcher()
{
@Override
public boolean matches(boolean includeUnknown)
{
Object[] dims = currEntry.get().getDims();
if (dimIndex >= dims.length) {
return includeUnknown || value == null;
}
int[] dimsInt = (int[]) dims[dimIndex];
if (dimsInt == null || dimsInt.length == 0) {
return includeUnknown || value == null;
}
for (int id : dimsInt) {
if (id == valueId) {
return true;
}
if (includeUnknown && (id == nullValueId)) {
return true;
}
}
return false;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
// nothing to inspect
}
};
} else {
return new ValueMatcher()
{
@Override
public boolean matches(boolean includeUnknown)
{
if (includeUnknown) {
IndexedInts row = getRow();
final int size = row.size();
if (size == 0) {
return true;
}
for (int i = 0; i < size; i++) {
if (row.get(i) == nullValueId) {
return true;
}
}
}
return false;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
// nothing to inspect
}
};
}
}
@Override
public ValueMatcher makeValueMatcher(final DruidPredicateFactory predicateFactory)
{
final BitSet checkedIds = new BitSet(maxId);
final BitSet matchingIds = new BitSet(maxId);
final DruidObjectPredicate<String> predicate = predicateFactory.makeStringPredicate();
// Lazy matcher; only check an id if matches() is called.
return new ValueMatcher()
{
@Override
public boolean matches(boolean includeUnknown)
{
Object[] dims = currEntry.get().getDims();
if (dimIndex >= dims.length) {
return predicate.apply(null).matches(includeUnknown);
}
int[] dimsInt = (int[]) dims[dimIndex];
if (dimsInt == null || dimsInt.length == 0) {
return predicate.apply(null).matches(includeUnknown);
}
for (int id : dimsInt) {
if (checkedIds.get(id)) {
if (matchingIds.get(id)) {
return true;
}
} else {
final DruidPredicateMatch matches = predicate.apply(lookupName(id));
checkedIds.set(id);
if (matches.matches(includeUnknown)) {
matchingIds.set(id);
return true;
}
}
}
return false;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
// nothing to inspect
}
};
}
@Override
public int getValueCardinality()
{
return maxId;
}
@Override
public String lookupName(int id)
{
if (id >= maxId) {
// Sanity check; IDs beyond maxId should not be known to callers. (See comment above.)
throw new ISE("id[%d] >= maxId[%d]", id, maxId);
}
final String strValue = getActualValue(id, false);
return extractionFn == null ? strValue : extractionFn.apply(strValue);
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return extractionFn == null ? this : null;
}
@Override
public int lookupId(@Nullable String name)
{
if (extractionFn != null) {
throw new UnsupportedOperationException(
"cannot perform lookup when applying an extraction function"
);
}
final int id = getEncodedValue(name, false);
if (id < maxId) {
return id;
} else {
// Can happen if a value was added to our dimLookup after this selector was created. Act like it
// doesn't exist.
return DimensionDictionary.ABSENT_VALUE_ID;
}
}
@SuppressWarnings("deprecation")
@Nullable
@Override
public Object getObject()
{
IncrementalIndexRow key = currEntry.get();
if (key == null) {
return null;
}
Object[] dims = key.getDims();
if (dimIndex >= dims.length) {
return null;
}
return convertUnsortedEncodedKeyComponentToActualList((int[]) dims[dimIndex]);
}
@SuppressWarnings("deprecation")
@Override
public Class classOfObject()
{
return Object.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
// nothing to inspect
}
}
return new IndexerDimensionSelector();
}