public static Expression pushKeyExpressionsToScan()

in phoenix-core-client/src/main/java/org/apache/phoenix/compile/WhereOptimizer.java [128:422]
187 lines of code
58 McCabe index (conditional complexity)

    public static Expression pushKeyExpressionsToScan(StatementContext context, Set<Hint> hints,
            Expression whereClause, Set<Expression> extractNodes, Optional<byte[]> minOffset) throws SQLException {
        PName tenantId = context.getConnection().getTenantId();
        byte[] tenantIdBytes = null;
        PTable table = context.getCurrentTable().getTable();
        Integer nBuckets = table.getBucketNum();
        boolean isSalted = nBuckets != null;
        RowKeySchema schema = table.getRowKeySchema();
        boolean isMultiTenant = tenantId != null && table.isMultiTenant();
        boolean isSharedIndex = table.getViewIndexId() != null;
        ImmutableBytesWritable ptr = context.getTempPtr();
        int maxInListSkipScanSize = context.getConnection().getQueryServices().getConfiguration()
                .getInt(QueryServices.MAX_IN_LIST_SKIP_SCAN_SIZE,
                        QueryServicesOptions.DEFAULT_MAX_IN_LIST_SKIP_SCAN_SIZE);

        if (isMultiTenant) {
            tenantIdBytes = ScanUtil.getTenantIdBytes(schema, isSalted, tenantId, isSharedIndex);
        }

        if (whereClause == null && (tenantId == null || !table.isMultiTenant()) && table.getViewIndexId() == null && !minOffset.isPresent()) {
            context.setScanRanges(ScanRanges.EVERYTHING);
            return whereClause;
        }
        if (LiteralExpression.isBooleanFalseOrNull(whereClause)) {
            context.setScanRanges(ScanRanges.NOTHING);
            return null;
        }
        KeyExpressionVisitor visitor = new KeyExpressionVisitor(context, table);
        KeyExpressionVisitor.KeySlots keySlots = null;
        if (whereClause != null) {
            // TODO:: When we only have one where clause, the keySlots returns as a single slot object,
            // instead of an array of slots for the corresponding column. Change the behavior so it
            // becomes consistent.
            keySlots = whereClause.accept(visitor);

            if (keySlots == null && (tenantId == null || !table.isMultiTenant()) && table.getViewIndexId() == null && !minOffset.isPresent()) {
                // FIXME this overwrites salting info in the scanRange
                context.setScanRanges(ScanRanges.EVERYTHING);
                return whereClause;
            }
            // If a parameter is bound to null (as will be the case for calculating ResultSetMetaData and
            // ParameterMetaData), this will be the case. It can also happen for an equality comparison
            // for unequal lengths.
            if (keySlots == KeyExpressionVisitor.EMPTY_KEY_SLOTS) {
                context.setScanRanges(ScanRanges.NOTHING);
                return null;
            }
        }
        if (keySlots == null) {
            keySlots = KeyExpressionVisitor.EMPTY_KEY_SLOTS;
        }

        if (extractNodes == null) {
            extractNodes = new HashSet<Expression>(table.getPKColumns().size());
        }

        int pkPos = 0;
        int nPKColumns = table.getPKColumns().size();
        int[] slotSpanArray = new int[nPKColumns];
        List<List<KeyRange>> cnf = Lists.newArrayListWithExpectedSize(schema.getMaxFields());
        boolean hasViewIndex = table.getViewIndexId() != null;
        Iterator<KeyExpressionVisitor.KeySlot> iterator = keySlots.getSlots().iterator();
        // Add placeholder for salt byte ranges
        if (isSalted) {
            cnf.add(SALT_PLACEHOLDER);
            // Increment the pkPos, as the salt column is in the row schema
            // Do not increment the iterator, though, as there will never be
            // an expression in the keySlots for the salt column
            pkPos++;
        }

        // Add unique index ID for shared indexes on views. This ensures
        // that different indexes don't interleave.
        if (hasViewIndex) {
            byte[] viewIndexBytes = table.getviewIndexIdType().toBytes(table.getViewIndexId());
            KeyRange indexIdKeyRange = KeyRange.getKeyRange(viewIndexBytes);
            cnf.add(Collections.singletonList(indexIdKeyRange));
            pkPos++;
        }

        // Add tenant data isolation for tenant-specific tables
        if (isMultiTenant) {
            KeyRange tenantIdKeyRange = KeyRange.getKeyRange(tenantIdBytes);
            cnf.add(Collections.singletonList(tenantIdKeyRange));
            pkPos++;
        }

        boolean forcedSkipScan = hints.contains(Hint.SKIP_SCAN);
        boolean forcedRangeScan = hints.contains(Hint.RANGE_SCAN);
        boolean hasUnboundedRange = false;
        boolean hasMultiRanges = false;
        boolean hasRangeKey = false;
        boolean useSkipScan = false;
        boolean checkMaxSkipScanCardinality = false;
        BigInteger inListSkipScanCardinality = BigInteger.ONE; // using BigInteger to avoid overflow issues


        // Concat byte arrays of literals to form scan start key
        while (iterator.hasNext()) {
            KeyExpressionVisitor.KeySlot slot = iterator.next();
            // If the position of the pk columns in the query skips any part of the row k
            // then we have to handle in the next phase through a key filter.
            // If the slot is null this means we have no entry for this pk position.
            if (slot == null || slot.getKeyRanges().isEmpty())  {
                continue;
            }
            if(slot.getPKPosition() < pkPos) {
                continue;
            }
            if (slot.getPKPosition() != pkPos) {
                hasUnboundedRange = hasRangeKey = true;
                for (int i= pkPos; i < slot.getPKPosition(); i++) {
                    cnf.add(Collections.singletonList(KeyRange.EVERYTHING_RANGE));
                }
            }
            KeyPart keyPart = slot.getKeyPart();
            List<KeyRange> keyRanges = slot.getKeyRanges();
            SortOrder prevSortOrder = null;
            int slotOffset = 0;
            int clipLeftSpan = 0;
            boolean onlySplittedRVCLeftValid = false;
            boolean stopExtracting = false;
            // Iterate through all spans of this slot
            boolean areAllSingleKey = KeyRange.areAllSingleKey(keyRanges);
            boolean isInList = false;
            int cnfStartPos = cnf.size();

            // TODO:
            //  Using keyPart.getExtractNodes() to determine whether the keyPart has a IN List
            //  is not guaranteed, since the IN LIST slot may not have any extracted nodes.
            if (keyPart.getExtractNodes() != null && keyPart.getExtractNodes().size() > 0
                    && keyPart.getExtractNodes().iterator().next() instanceof InListExpression){
                isInList = true;
            }
            while (true) {
                SortOrder sortOrder =
                        schema.getField(slot.getPKPosition() + slotOffset).getSortOrder();
                if (prevSortOrder == null)  {
                    prevSortOrder = sortOrder;
                } else if (prevSortOrder != sortOrder || (prevSortOrder == SortOrder.DESC && isInList)) {
                    //Consider the Universe of keys to be [0,7]+ on the leading column A
                    // and [0,7]+ on trailing column B, with a padbyte of 0 for ASC and 7 for DESC
                    //if our key range for ASC keys is leading [2,*] and trailing [3,*],
                    //   → [x203 - x777]
                    //for this particular plan the leading key is descending (ie index desc)
                    // consider the data
                    // (3,2) ORDER BY A,B→ x302 → ORDER BY A DESC,B → x472
                    // (3,3) ORDER BY A,B→ x303 → ORDER BY A DESC,B → x473
                    // (3,4) ORDER BY A,B→ x304 → ORDER BY A DESC,B → x474
                    // (2,3) ORDER BY A,B→ x203 → ORDER BY A DESC,B → x573
                    // (2,7) ORDER BY A,B→ x207 → ORDER BY A DESC,B → x577
                    // And the logical expression (A,B) > (2,3)
                    // In the DESC A order the selected values are not contiguous,
                    // (2,7),(3,2),(3,3),(3,4)
                    // In the normal ASC order by the values are all contiguous
                    // Therefore the key cannot be extracted out and a full filter must be applied
                    // In addition, the boundary of the scan is tricky as the values are not bound
                    // by (2,3) it is instead bound by (2,7), this should map to, [x000,x577]
                    // FUTURE: May be able to perform a type of skip scan for this case.

                    // If the sort order changes, we must clip the portion with the same sort order
                    // and invert the key ranges and swap the upper and lower bounds.
                    List<KeyRange> leftRanges = clipLeft(schema, slot.getPKPosition()
                            + slotOffset - clipLeftSpan, clipLeftSpan, keyRanges, ptr);
                    keyRanges =
                            clipRight(schema, slot.getPKPosition() + slotOffset - 1, keyRanges,
                                    leftRanges, ptr);
                    leftRanges = KeyRange.coalesce(leftRanges);
                    keyRanges = KeyRange.coalesce(keyRanges);
                    if (prevSortOrder == SortOrder.DESC) {
                        leftRanges = invertKeyRanges(leftRanges);
                    }
                    slotSpanArray[cnf.size()] = clipLeftSpan-1;
                    cnf.add(leftRanges);
                    pkPos = slot.getPKPosition() + slotOffset;
                    clipLeftSpan = 0;
                    prevSortOrder = sortOrder;
                    // If we had an IN clause with mixed sort ordering then we need to check the possibility of
                    // skip scan key generation explosion.
                    checkMaxSkipScanCardinality |= isInList;
                    // since we have to clip the portion with the same sort order, we can no longer
                    // extract the nodes from the where clause
                    // for eg. for the schema A VARCHAR DESC, B VARCHAR ASC and query
                    //   WHERE (A,B) < ('a','b')
                    // the range (* - a\xFFb) is converted to [~a-*)(*-b)
                    // so we still need to filter on A,B
                    stopExtracting = true;
                    if(!areAllSingleKey) {
                        //for cnf, we only add [~a-*) to it, (*-b) is skipped.
                        //but for all single key, we can continue.
                        onlySplittedRVCLeftValid = true;
                        break;
                    }
                }
                clipLeftSpan++;
                slotOffset++;
                if (slotOffset >= slot.getPKSpan()) {
                    break;
                }
            }

            if(onlySplittedRVCLeftValid) {
                keyRanges = cnf.get(cnf.size()-1);
            } else {
                if (schema.getField(
                       slot.getPKPosition() + slotOffset - 1).getSortOrder() == SortOrder.DESC) {
                   keyRanges = invertKeyRanges(keyRanges);
                }
                pkPos = slot.getPKPosition() + slotOffset;
                slotSpanArray[cnf.size()] = clipLeftSpan-1;
                cnf.add(keyRanges);
            }

            // Do not use the skipScanFilter when there is a large IN clause (for e.g > 50k elements)
            // Since the generation of point keys for skip scan filter will blow up the memory usage.
            // See ScanRanges.getPointKeys(...) where using the various slot key ranges
            // to generate point keys will lead to combinatorial explosion.
            // The following check will ensure the cardinality of generated point keys
            // is below the configured max (maxInListSkipScanSize).
            // We shall force a range scan if the configured max is exceeded.
            // cnfStartPos => is the start slot of this IN list
            if (checkMaxSkipScanCardinality) {
                for (int i = cnfStartPos; i < cnf.size(); i++) {
                    // using int can result in overflow
                    inListSkipScanCardinality =
                        inListSkipScanCardinality.multiply(BigInteger.valueOf(cnf.get(i).size()));
                }
                // If the maxInListSkipScanSize <= 0 then the feature (to force range scan) is turned off
                if (maxInListSkipScanSize > 0) {
                    forcedRangeScan =
                        inListSkipScanCardinality.compareTo(BigInteger.valueOf(maxInListSkipScanSize)) == 1 ? true : false;
                }
                // Reset the check flag for the next IN list clause
                checkMaxSkipScanCardinality = false;
            }

            // TODO: when stats are available, we may want to use a skip scan if the
            // cardinality of this slot is low.
            /**
             * We use skip scan when:
             * 1.previous slot has unbound and force skip scan and
             * 2.not force Range Scan and
             * 3.previous rowkey slot has range or current rowkey slot have multiple ranges.
             *
             * Once we can not use skip scan and we have a non-contiguous range, we can not remove
             * the whereExpressions of current rowkey slot from the current {@link SelectStatement#where},
             * because the {@link Scan#startRow} and {@link Scan#endRow} could not exactly represent
             * currentRowKeySlotRanges.
             * So we should stop extracting whereExpressions of current rowkey slot once we encounter:
             * 1. we now use range scan and
             * 2. previous rowkey slot has unbound or
             *    previous rowkey slot has range or
             *    current rowkey slot have multiple ranges.
             */
            hasMultiRanges |= keyRanges.size() > 1;
            useSkipScan |=
                    (!hasUnboundedRange || forcedSkipScan) &&
                    !forcedRangeScan &&
                    (hasRangeKey || hasMultiRanges);

            stopExtracting |=
                     !useSkipScan &&
                     (hasUnboundedRange || hasRangeKey || hasMultiRanges);

            for (int i = 0; (!hasUnboundedRange || !hasRangeKey) && i < keyRanges.size(); i++) {
                KeyRange range  = keyRanges.get(i);
                if (range.isUnbound()) {
                    hasUnboundedRange = hasRangeKey = true;
                } else if (!range.isSingleKey()) {
                    hasRangeKey = true;
                }
            }
            // Will be null in cases for which only part of the expression was factored out here
            // to set the start/end key. An example would be <column> LIKE 'foo%bar' where we can
            // set the start key to 'foo' but still need to match the regex at filter time.
            // Don't extract expressions if we're forcing a range scan and we've already come
            // across a multi-range for a prior slot. The reason is that we have an inexact range after
            // that, so must filter on the remaining conditions (see issue #467).
            if (!stopExtracting) {
                Set<Expression> nodesToExtract = keyPart.getExtractNodes();
                extractNodes.addAll(nodesToExtract);
            }
        }
        // If we have fully qualified point keys with multi-column spans (i.e. RVC),
        // we can still use our skip scan. The ScanRanges.create() call will explode
        // out the keys.
        slotSpanArray = Arrays.copyOf(slotSpanArray, cnf.size());
        ScanRanges scanRanges = ScanRanges.create(schema, cnf, slotSpanArray, nBuckets, useSkipScan, table.getRowTimestampColPos(), minOffset);
        context.setScanRanges(scanRanges);
        if (whereClause == null) {
            return null;
        } else {
            return whereClause.accept(new RemoveExtractedNodesVisitor(extractNodes));
        }
    }