protected boolean checkAndApplyJoinTransformation()

in asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceJoinAccessMethodRule.java [262:486]
137 lines of code
37 McCabe index (conditional complexity)

    protected boolean checkAndApplyJoinTransformation(Mutable<ILogicalOperator> opRef, IOptimizationContext context,
            boolean checkApplicableOnly, List<Pair<IAccessMethod, Index>> chosenIndexes,
            Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs) throws AlgebricksException {
        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
        boolean joinFoundAndOptimizationApplied;

        // Adds the current operator to the operator list that contains operators after a join operator
        // in case there is a descendant join operator and it could be transformed first.
        afterJoinRefs.add(opRef);

        // Recursively check the plan and try to optimize it. We first check the children of the given operator
        // to make sure an earlier join in the path is optimized first.
        for (Mutable<ILogicalOperator> inputOpRef : op.getInputs()) {
            joinFoundAndOptimizationApplied = checkAndApplyJoinTransformation(inputOpRef, context, checkApplicableOnly,
                    chosenIndexes, analyzedAMs);
            if (joinFoundAndOptimizationApplied) {
                return true;
            }
        }

        // Now, we are sure that transformation attempts for earlier joins have been failed.
        // Checks the current operator pattern to see whether it is a JOIN or not.
        boolean isInnerJoin = false;
        boolean isLeftOuterJoin = false;
        int leftOuterJoinChildIdx;

        Mutable<ILogicalOperator> joinRefFromThisOp = null;
        AbstractBinaryJoinOperator joinOpFromThisOp = null;
        // operators that need to be removed from the afterJoinRefs list.
        Mutable<ILogicalOperator> opRefRemove = opRef;
        if (isInnerJoin(op)) {
            // Sets the inner join operator.
            isInnerJoin = true;
            joinRef = opRef;
            joinOp = (InnerJoinOperator) op;
            joinRefFromThisOp = opRef;
            joinOpFromThisOp = (InnerJoinOperator) op;
        } else if ((leftOuterJoinChildIdx = findLeftOuterJoinChild(op)) >= 0) {
            // Sets the left-outer-join operator.
            // A child of the current operator is LEFTOUTERJOIN.
            isLeftOuterJoin = true;
            joinRef = op.getInputs().get(leftOuterJoinChildIdx);
            joinOp = (LeftOuterJoinOperator) joinRef.getValue();
            joinRefFromThisOp = op.getInputs().get(leftOuterJoinChildIdx);
            joinOpFromThisOp = (LeftOuterJoinOperator) joinRefFromThisOp.getValue();
            // Left outer join's parent operator should not be removed at this point since the given left-outer-join
            // can be transformed.
            opRefRemove = op.getInputs().get(leftOuterJoinChildIdx);
        }
        afterJoinRefs.remove(opRefRemove);

        // For a JOIN case, tries to transform the given plan.
        if (isInnerJoin || isLeftOuterJoin) {

            // Restores the information from this operator since it might have been be set to null
            // if there are other join operators in the earlier path.
            joinRef = joinRefFromThisOp;
            joinOp = joinOpFromThisOp;

            boolean continueCheck = true;

            // Already checked? If not, this operator may be optimized.
            if (context.checkIfInDontApplySet(this, joinOp)) {
                continueCheck = false;
            }

            // For each access method, this contains the information about
            // whether an available index can be applicable or not.
            if (!checkApplicableOnly && continueCheck) {
                analyzedAMs = new HashMap<>();
            }

            if (continueCheck && context.getPhysicalOptimizationConfig().isArrayIndexEnabled()
                    && JoinFromSubplanRewrite.isApplicableForRewriteCursory(metadataProvider, joinOp)) {
                // If there exists a SUBPLAN in our plan, and we are conditioning on a variable, attempt to rewrite
                // this subplan to allow an array-index AM to be introduced. If successful, this rewrite will transform
                // into an index-nested-loop-join. This rewrite is to be used for pushing the UNNESTs and ASSIGNs from
                // the subplan into the index branch and giving the join a condition for this rule to optimize.
                // *No nodes* from this rewrite will be used beyond this point.
                joinFromSubplanRewrite.findAfterSubplanSelectOperator(afterJoinRefs);
                if (rewriteLocallyAndTransform(joinRef, context, joinFromSubplanRewrite, checkApplicableOnly,
                        chosenIndexes, analyzedAMs)) {
                    // Connect the after-join operators to the index subtree root before this rewrite. This also avoids
                    // performing the secondary index validation step twice.
                    ILogicalOperator lastAfterJoinOp = afterJoinRefs.get(afterJoinRefs.size() - 1).getValue();
                    OperatorManipulationUtil.substituteOpInInput(lastAfterJoinOp, joinOp, joinOp.getInputs().get(1));
                    context.computeAndSetTypeEnvironmentForOperator(lastAfterJoinOp);
                    return true;
                }
            }

            // Checks the condition of JOIN operator is a function call since only function call can be transformed
            // using available indexes. If so, initializes the subtree information that will be used later to decide
            // whether the given plan is truly optimizable or not.
            if (continueCheck && !checkJoinOpConditionAndInitSubTree(context)) {
                continueCheck = false;
            }

            // Analyzes the condition of SELECT operator and initializes analyzedAMs.
            // Check whether the function in the SELECT operator can be truly transformed.
            boolean matchInLeftSubTree = false;
            boolean matchInRightSubTree = false;
            if (continueCheck) {
                if (leftSubTree.hasDataSource()) {
                    matchInLeftSubTree = analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(joinCond,
                            leftSubTree.getAssignsAndUnnests(), analyzedAMs, context, typeEnvironment);
                }
                if (rightSubTree.hasDataSource()) {
                    matchInRightSubTree = analyzeSelectOrJoinOpConditionAndUpdateAnalyzedAM(joinCond,
                            rightSubTree.getAssignsAndUnnests(), analyzedAMs, context, typeEnvironment);
                }
            }

            // Finds the dataset from the data-source and the record type of the dataset from the metadata.
            // This will be used to find an applicable index on the dataset.
            boolean checkLeftSubTreeMetadata = false;
            boolean checkRightSubTreeMetadata = false;
            if (continueCheck && matchInRightSubTree) {
                // Set dataset and type metadata.
                if (matchInLeftSubTree) {
                    checkLeftSubTreeMetadata = leftSubTree.setDatasetAndTypeMetadata(metadataProvider);
                }
                checkRightSubTreeMetadata = rightSubTree.setDatasetAndTypeMetadata(metadataProvider);
            }

            if (continueCheck && checkRightSubTreeMetadata) {
                // Map variables to the applicable indexes and find the field name and type.
                // Then find the applicable indexes for the variables used in the JOIN condition.
                fillSubTreeIndexExprs(leftSubTree, analyzedAMs, context, true, !checkLeftSubTreeMetadata);
                fillSubTreeIndexExprs(rightSubTree, analyzedAMs, context, false);

                // Prunes the access methods based on the function expression and access methods.
                pruneIndexCandidates(analyzedAMs, context, typeEnvironment, checkApplicableOnly);

                // If the right subtree (inner branch) has indexes, one of those indexes will be used.
                // Removes the indexes from the outer branch in the optimizer's consideration list for this rule.
                pruneIndexCandidatesFromOuterBranch(analyzedAMs);

                // We are going to use indexes from the inner branch.
                // If no index is available, then we stop here.
                Pair<IAccessMethod, Index> chosenIndex = chooseBestIndex(analyzedAMs, chosenIndexes);
                if (chosenIndex == null) {
                    context.addToDontApplySet(this, joinOp);
                    continueCheck = false;
                }

                if (continueCheck) {
                    // Finds the field name of each variable in the sub-tree such as variables for order by.
                    // This step is required when checking index-only plan.
                    if (checkLeftSubTreeMetadata) {
                        fillFieldNamesInTheSubTree(leftSubTree, context);
                    }
                    if (checkRightSubTreeMetadata) {
                        fillFieldNamesInTheSubTree(rightSubTree, context);
                    }

                    // Applies the plan transformation using chosen index.
                    AccessMethodAnalysisContext analysisCtx = analyzedAMs.get(chosenIndex.first);

                    IAlgebricksConstantValue leftOuterMissingValue =
                            isLeftOuterJoin ? ((LeftOuterJoinOperator) joinOp).getMissingValue() : null;

                    // For a left outer join with a special GroupBy, prepare objects to reset LOJ's
                    // nullPlaceHolderVariable in that GroupBy's nested plan.
                    // See AccessMethodUtils#removeUnjoinedDuplicatesInLOJ() for a definition of a special GroupBy
                    // and extra output processing steps needed when it's not available.
                    boolean isLeftOuterJoinWithSpecialGroupBy;
                    if (isLeftOuterJoin && op.getOperatorTag() == LogicalOperatorTag.GROUP) {
                        GroupByOperator groupByOp = (GroupByOperator) opRef.getValue();
                        FunctionIdentifier isMissingNullFuncId = Objects
                                .requireNonNull(OperatorPropertiesUtil.getIsMissingNullFunction(leftOuterMissingValue));
                        ScalarFunctionCallExpression isMissingNullFuncExpr = AccessMethodUtils
                                .findLOJIsMissingNullFuncInGroupBy(groupByOp, rightSubTree, isMissingNullFuncId);
                        // TODO:(dmitry) do we need additional checks to ensure that this is a special GroupBy,
                        // i.e. that this GroupBy will eliminate unjoined duplicates?
                        isLeftOuterJoinWithSpecialGroupBy = isMissingNullFuncExpr != null;
                        if (isLeftOuterJoinWithSpecialGroupBy) {
                            analysisCtx.setLOJSpecialGroupByOpRef(opRef);
                            analysisCtx.setLOJIsMissingNullFuncInSpecialGroupBy(isMissingNullFuncExpr);
                        }
                    } else {
                        isLeftOuterJoinWithSpecialGroupBy = false;
                    }

                    Dataset indexDataset = analysisCtx.getDatasetFromIndexDatasetMap(chosenIndex.second);

                    // We assume that the left subtree is the outer branch and the right subtree
                    // is the inner branch. This assumption holds true since we only use an index
                    // from the right subtree. The following is just a sanity check.
                    if (!rightSubTree.hasDataSourceScan()
                            && !indexDataset.getDatasetName().equals(rightSubTree.getDataset().getDatasetName())) {
                        return false;
                    }

                    if (checkApplicableOnly) {
                        return true;
                    }

                    // Finally, tries to apply plan transformation using the chosen index.
                    boolean res = chosenIndex.first.applyJoinPlanTransformation(afterJoinRefs, joinRef, leftSubTree,
                            rightSubTree, chosenIndex.second, analysisCtx, context, isLeftOuterJoin,
                            isLeftOuterJoinWithSpecialGroupBy, leftOuterMissingValue);

                    // If the plan transformation is successful, we don't need to traverse the plan
                    // any more, since if there are more JOIN operators, the next trigger on this plan
                    // will find them.
                    if (res) {
                        return res;
                    }
                }
            }

            joinRef = null;
            joinOp = null;
        }

        // Checked the given left-outer-join operator and it is not transformed.
        // So, the left-outer-join's parent operator should be removed from the afterJoinRefs list
        // since the current operator is that parent operator.
        if (isLeftOuterJoin) {
            afterJoinRefs.remove(opRef);
        }

        return false;
    }