in phoenix-core-client/src/main/java/org/apache/phoenix/compile/WhereOptimizer.java [128:422]
public static Expression pushKeyExpressionsToScan(StatementContext context, Set<Hint> hints,
Expression whereClause, Set<Expression> extractNodes, Optional<byte[]> minOffset) throws SQLException {
PName tenantId = context.getConnection().getTenantId();
byte[] tenantIdBytes = null;
PTable table = context.getCurrentTable().getTable();
Integer nBuckets = table.getBucketNum();
boolean isSalted = nBuckets != null;
RowKeySchema schema = table.getRowKeySchema();
boolean isMultiTenant = tenantId != null && table.isMultiTenant();
boolean isSharedIndex = table.getViewIndexId() != null;
ImmutableBytesWritable ptr = context.getTempPtr();
int maxInListSkipScanSize = context.getConnection().getQueryServices().getConfiguration()
.getInt(QueryServices.MAX_IN_LIST_SKIP_SCAN_SIZE,
QueryServicesOptions.DEFAULT_MAX_IN_LIST_SKIP_SCAN_SIZE);
if (isMultiTenant) {
tenantIdBytes = ScanUtil.getTenantIdBytes(schema, isSalted, tenantId, isSharedIndex);
}
if (whereClause == null && (tenantId == null || !table.isMultiTenant()) && table.getViewIndexId() == null && !minOffset.isPresent()) {
context.setScanRanges(ScanRanges.EVERYTHING);
return whereClause;
}
if (LiteralExpression.isBooleanFalseOrNull(whereClause)) {
context.setScanRanges(ScanRanges.NOTHING);
return null;
}
KeyExpressionVisitor visitor = new KeyExpressionVisitor(context, table);
KeyExpressionVisitor.KeySlots keySlots = null;
if (whereClause != null) {
// TODO:: When we only have one where clause, the keySlots returns as a single slot object,
// instead of an array of slots for the corresponding column. Change the behavior so it
// becomes consistent.
keySlots = whereClause.accept(visitor);
if (keySlots == null && (tenantId == null || !table.isMultiTenant()) && table.getViewIndexId() == null && !minOffset.isPresent()) {
// FIXME this overwrites salting info in the scanRange
context.setScanRanges(ScanRanges.EVERYTHING);
return whereClause;
}
// If a parameter is bound to null (as will be the case for calculating ResultSetMetaData and
// ParameterMetaData), this will be the case. It can also happen for an equality comparison
// for unequal lengths.
if (keySlots == KeyExpressionVisitor.EMPTY_KEY_SLOTS) {
context.setScanRanges(ScanRanges.NOTHING);
return null;
}
}
if (keySlots == null) {
keySlots = KeyExpressionVisitor.EMPTY_KEY_SLOTS;
}
if (extractNodes == null) {
extractNodes = new HashSet<Expression>(table.getPKColumns().size());
}
int pkPos = 0;
int nPKColumns = table.getPKColumns().size();
int[] slotSpanArray = new int[nPKColumns];
List<List<KeyRange>> cnf = Lists.newArrayListWithExpectedSize(schema.getMaxFields());
boolean hasViewIndex = table.getViewIndexId() != null;
Iterator<KeyExpressionVisitor.KeySlot> iterator = keySlots.getSlots().iterator();
// Add placeholder for salt byte ranges
if (isSalted) {
cnf.add(SALT_PLACEHOLDER);
// Increment the pkPos, as the salt column is in the row schema
// Do not increment the iterator, though, as there will never be
// an expression in the keySlots for the salt column
pkPos++;
}
// Add unique index ID for shared indexes on views. This ensures
// that different indexes don't interleave.
if (hasViewIndex) {
byte[] viewIndexBytes = table.getviewIndexIdType().toBytes(table.getViewIndexId());
KeyRange indexIdKeyRange = KeyRange.getKeyRange(viewIndexBytes);
cnf.add(Collections.singletonList(indexIdKeyRange));
pkPos++;
}
// Add tenant data isolation for tenant-specific tables
if (isMultiTenant) {
KeyRange tenantIdKeyRange = KeyRange.getKeyRange(tenantIdBytes);
cnf.add(Collections.singletonList(tenantIdKeyRange));
pkPos++;
}
boolean forcedSkipScan = hints.contains(Hint.SKIP_SCAN);
boolean forcedRangeScan = hints.contains(Hint.RANGE_SCAN);
boolean hasUnboundedRange = false;
boolean hasMultiRanges = false;
boolean hasRangeKey = false;
boolean useSkipScan = false;
boolean checkMaxSkipScanCardinality = false;
BigInteger inListSkipScanCardinality = BigInteger.ONE; // using BigInteger to avoid overflow issues
// Concat byte arrays of literals to form scan start key
while (iterator.hasNext()) {
KeyExpressionVisitor.KeySlot slot = iterator.next();
// If the position of the pk columns in the query skips any part of the row k
// then we have to handle in the next phase through a key filter.
// If the slot is null this means we have no entry for this pk position.
if (slot == null || slot.getKeyRanges().isEmpty()) {
continue;
}
if(slot.getPKPosition() < pkPos) {
continue;
}
if (slot.getPKPosition() != pkPos) {
hasUnboundedRange = hasRangeKey = true;
for (int i= pkPos; i < slot.getPKPosition(); i++) {
cnf.add(Collections.singletonList(KeyRange.EVERYTHING_RANGE));
}
}
KeyPart keyPart = slot.getKeyPart();
List<KeyRange> keyRanges = slot.getKeyRanges();
SortOrder prevSortOrder = null;
int slotOffset = 0;
int clipLeftSpan = 0;
boolean onlySplittedRVCLeftValid = false;
boolean stopExtracting = false;
// Iterate through all spans of this slot
boolean areAllSingleKey = KeyRange.areAllSingleKey(keyRanges);
boolean isInList = false;
int cnfStartPos = cnf.size();
// TODO:
// Using keyPart.getExtractNodes() to determine whether the keyPart has a IN List
// is not guaranteed, since the IN LIST slot may not have any extracted nodes.
if (keyPart.getExtractNodes() != null && keyPart.getExtractNodes().size() > 0
&& keyPart.getExtractNodes().iterator().next() instanceof InListExpression){
isInList = true;
}
while (true) {
SortOrder sortOrder =
schema.getField(slot.getPKPosition() + slotOffset).getSortOrder();
if (prevSortOrder == null) {
prevSortOrder = sortOrder;
} else if (prevSortOrder != sortOrder || (prevSortOrder == SortOrder.DESC && isInList)) {
//Consider the Universe of keys to be [0,7]+ on the leading column A
// and [0,7]+ on trailing column B, with a padbyte of 0 for ASC and 7 for DESC
//if our key range for ASC keys is leading [2,*] and trailing [3,*],
// → [x203 - x777]
//for this particular plan the leading key is descending (ie index desc)
// consider the data
// (3,2) ORDER BY A,B→ x302 → ORDER BY A DESC,B → x472
// (3,3) ORDER BY A,B→ x303 → ORDER BY A DESC,B → x473
// (3,4) ORDER BY A,B→ x304 → ORDER BY A DESC,B → x474
// (2,3) ORDER BY A,B→ x203 → ORDER BY A DESC,B → x573
// (2,7) ORDER BY A,B→ x207 → ORDER BY A DESC,B → x577
// And the logical expression (A,B) > (2,3)
// In the DESC A order the selected values are not contiguous,
// (2,7),(3,2),(3,3),(3,4)
// In the normal ASC order by the values are all contiguous
// Therefore the key cannot be extracted out and a full filter must be applied
// In addition, the boundary of the scan is tricky as the values are not bound
// by (2,3) it is instead bound by (2,7), this should map to, [x000,x577]
// FUTURE: May be able to perform a type of skip scan for this case.
// If the sort order changes, we must clip the portion with the same sort order
// and invert the key ranges and swap the upper and lower bounds.
List<KeyRange> leftRanges = clipLeft(schema, slot.getPKPosition()
+ slotOffset - clipLeftSpan, clipLeftSpan, keyRanges, ptr);
keyRanges =
clipRight(schema, slot.getPKPosition() + slotOffset - 1, keyRanges,
leftRanges, ptr);
leftRanges = KeyRange.coalesce(leftRanges);
keyRanges = KeyRange.coalesce(keyRanges);
if (prevSortOrder == SortOrder.DESC) {
leftRanges = invertKeyRanges(leftRanges);
}
slotSpanArray[cnf.size()] = clipLeftSpan-1;
cnf.add(leftRanges);
pkPos = slot.getPKPosition() + slotOffset;
clipLeftSpan = 0;
prevSortOrder = sortOrder;
// If we had an IN clause with mixed sort ordering then we need to check the possibility of
// skip scan key generation explosion.
checkMaxSkipScanCardinality |= isInList;
// since we have to clip the portion with the same sort order, we can no longer
// extract the nodes from the where clause
// for eg. for the schema A VARCHAR DESC, B VARCHAR ASC and query
// WHERE (A,B) < ('a','b')
// the range (* - a\xFFb) is converted to [~a-*)(*-b)
// so we still need to filter on A,B
stopExtracting = true;
if(!areAllSingleKey) {
//for cnf, we only add [~a-*) to it, (*-b) is skipped.
//but for all single key, we can continue.
onlySplittedRVCLeftValid = true;
break;
}
}
clipLeftSpan++;
slotOffset++;
if (slotOffset >= slot.getPKSpan()) {
break;
}
}
if(onlySplittedRVCLeftValid) {
keyRanges = cnf.get(cnf.size()-1);
} else {
if (schema.getField(
slot.getPKPosition() + slotOffset - 1).getSortOrder() == SortOrder.DESC) {
keyRanges = invertKeyRanges(keyRanges);
}
pkPos = slot.getPKPosition() + slotOffset;
slotSpanArray[cnf.size()] = clipLeftSpan-1;
cnf.add(keyRanges);
}
// Do not use the skipScanFilter when there is a large IN clause (for e.g > 50k elements)
// Since the generation of point keys for skip scan filter will blow up the memory usage.
// See ScanRanges.getPointKeys(...) where using the various slot key ranges
// to generate point keys will lead to combinatorial explosion.
// The following check will ensure the cardinality of generated point keys
// is below the configured max (maxInListSkipScanSize).
// We shall force a range scan if the configured max is exceeded.
// cnfStartPos => is the start slot of this IN list
if (checkMaxSkipScanCardinality) {
for (int i = cnfStartPos; i < cnf.size(); i++) {
// using int can result in overflow
inListSkipScanCardinality =
inListSkipScanCardinality.multiply(BigInteger.valueOf(cnf.get(i).size()));
}
// If the maxInListSkipScanSize <= 0 then the feature (to force range scan) is turned off
if (maxInListSkipScanSize > 0) {
forcedRangeScan =
inListSkipScanCardinality.compareTo(BigInteger.valueOf(maxInListSkipScanSize)) == 1 ? true : false;
}
// Reset the check flag for the next IN list clause
checkMaxSkipScanCardinality = false;
}
// TODO: when stats are available, we may want to use a skip scan if the
// cardinality of this slot is low.
/**
* We use skip scan when:
* 1.previous slot has unbound and force skip scan and
* 2.not force Range Scan and
* 3.previous rowkey slot has range or current rowkey slot have multiple ranges.
*
* Once we can not use skip scan and we have a non-contiguous range, we can not remove
* the whereExpressions of current rowkey slot from the current {@link SelectStatement#where},
* because the {@link Scan#startRow} and {@link Scan#endRow} could not exactly represent
* currentRowKeySlotRanges.
* So we should stop extracting whereExpressions of current rowkey slot once we encounter:
* 1. we now use range scan and
* 2. previous rowkey slot has unbound or
* previous rowkey slot has range or
* current rowkey slot have multiple ranges.
*/
hasMultiRanges |= keyRanges.size() > 1;
useSkipScan |=
(!hasUnboundedRange || forcedSkipScan) &&
!forcedRangeScan &&
(hasRangeKey || hasMultiRanges);
stopExtracting |=
!useSkipScan &&
(hasUnboundedRange || hasRangeKey || hasMultiRanges);
for (int i = 0; (!hasUnboundedRange || !hasRangeKey) && i < keyRanges.size(); i++) {
KeyRange range = keyRanges.get(i);
if (range.isUnbound()) {
hasUnboundedRange = hasRangeKey = true;
} else if (!range.isSingleKey()) {
hasRangeKey = true;
}
}
// Will be null in cases for which only part of the expression was factored out here
// to set the start/end key. An example would be <column> LIKE 'foo%bar' where we can
// set the start key to 'foo' but still need to match the regex at filter time.
// Don't extract expressions if we're forcing a range scan and we've already come
// across a multi-range for a prior slot. The reason is that we have an inexact range after
// that, so must filter on the remaining conditions (see issue #467).
if (!stopExtracting) {
Set<Expression> nodesToExtract = keyPart.getExtractNodes();
extractNodes.addAll(nodesToExtract);
}
}
// If we have fully qualified point keys with multi-column spans (i.e. RVC),
// we can still use our skip scan. The ScanRanges.create() call will explode
// out the keys.
slotSpanArray = Arrays.copyOf(slotSpanArray, cnf.size());
ScanRanges scanRanges = ScanRanges.create(schema, cnf, slotSpanArray, nBuckets, useSkipScan, table.getRowTimestampColPos(), minOffset);
context.setScanRanges(scanRanges);
if (whereClause == null) {
return null;
} else {
return whereClause.accept(new RemoveExtractedNodesVisitor(extractNodes));
}
}