private RelNode genSelectLogicalPlan()

in flink-connector-hive/src/main/java/org/apache/flink/table/planner/delegation/hive/HiveParserCalcitePlanner.java [2004:2446]
369 lines of code
63 McCabe index (conditional complexity)

    private RelNode genSelectLogicalPlan(
            HiveParserQB qb,
            RelNode srcRel,
            RelNode starSrcRel,
            Map<String, Integer> outerNameToPos,
            HiveParserRowResolver outerRR)
            throws SemanticException {
        // 0. Generate a Select Node for Windowing
        // Exclude the newly-generated select columns from */etc. resolution.
        HashSet<ColumnInfo> excludedColumns = new HashSet<>();
        RelNode selForWindow = genSelectForWindowing(qb, srcRel, excludedColumns);
        srcRel = (selForWindow == null) ? srcRel : selForWindow;

        ArrayList<ExprNodeDesc> exprNodeDescs = new ArrayList<>();

        HiveParserASTNode trfm = null;

        // 1. Get Select Expression List
        HiveParserQBParseInfo qbp = qb.getParseInfo();
        String selClauseName = qbp.getClauseNames().iterator().next();
        HiveParserASTNode selExprList = qbp.getSelForClause(selClauseName);

        // make sure if there is subquery it is top level expression
        HiveParserSubQueryUtils.checkForTopLevelSubqueries(selExprList);

        final boolean cubeRollupGrpSetPresent =
                !qbp.getDestRollups().isEmpty()
                        || !qbp.getDestGroupingSets().isEmpty()
                        || !qbp.getDestCubes().isEmpty();

        // 3. Query Hints
        int posn = 0;
        boolean hintPresent = selExprList.getChild(0).getType() == HiveASTParser.QUERY_HINT;
        if (hintPresent) {
            posn++;
        }

        // 4. Bailout if select involves Transform
        boolean isInTransform =
                selExprList.getChild(posn).getChild(0).getType() == HiveASTParser.TOK_TRANSFORM;
        if (isInTransform) {
            trfm = (HiveParserASTNode) selExprList.getChild(posn).getChild(0);
        }

        // 2.Row resolvers for input, output
        HiveParserRowResolver outRR = new HiveParserRowResolver();
        // SELECT * or SELECT TRANSFORM(*)
        Integer pos = 0;
        // TODO: will this also fix windowing? try
        HiveParserRowResolver inputRR = relToRowResolver.get(srcRel), starRR = inputRR;
        inputRR.setCheckForAmbiguity(true);
        if (starSrcRel != null) {
            starRR = relToRowResolver.get(starSrcRel);
        }

        // 5. Check if select involves UDTF
        String udtfTableAlias = null;
        SqlOperator udtfOperator = null;
        String genericUDTFName = null;
        ArrayList<String> udtfColAliases = new ArrayList<>();
        HiveParserASTNode expr = (HiveParserASTNode) selExprList.getChild(posn).getChild(0);
        int exprType = expr.getType();
        if (exprType == HiveASTParser.TOK_FUNCTION || exprType == HiveASTParser.TOK_FUNCTIONSTAR) {
            String funcName =
                    HiveParserTypeCheckProcFactory.DefaultExprProcessor.getFunctionText(expr, true);
            // we can't just try to get table function here because the operator table throws
            // exception if it's not a table function
            SqlOperator sqlOperator =
                    HiveParserUtils.getAnySqlOperator(funcName, frameworkConfig.getOperatorTable());
            if (HiveParserUtils.isUDTF(sqlOperator)) {
                LOG.debug("Found UDTF " + funcName);
                udtfOperator = sqlOperator;
                genericUDTFName = funcName;
                if (!HiveParserUtils.isNative(sqlOperator)) {
                    semanticAnalyzer.unparseTranslator.addIdentifierTranslation(
                            (HiveParserASTNode) expr.getChild(0));
                }
                if (exprType == HiveASTParser.TOK_FUNCTIONSTAR) {
                    semanticAnalyzer.genColListRegex(
                            ".*",
                            null,
                            (HiveParserASTNode) expr.getChild(0),
                            exprNodeDescs,
                            null,
                            inputRR,
                            starRR,
                            pos,
                            outRR,
                            qb.getAliases(),
                            false);
                }
            }
        }

        if (udtfOperator != null) {
            // Only support a single expression when it's a UDTF
            if (selExprList.getChildCount() > 1) {
                throw new SemanticException(
                        generateErrorMessage(
                                (HiveParserASTNode) selExprList.getChild(1),
                                ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg()));
            }

            HiveParserASTNode selExpr = (HiveParserASTNode) selExprList.getChild(posn);

            // Get the column / table aliases from the expression. Start from 1 as
            // 0 is the TOK_FUNCTION
            // column names also can be inferred from result of UDTF
            for (int i = 1; i < selExpr.getChildCount(); i++) {
                HiveParserASTNode selExprChild = (HiveParserASTNode) selExpr.getChild(i);
                switch (selExprChild.getType()) {
                    case HiveASTParser.Identifier:
                        udtfColAliases.add(
                                unescapeIdentifier(selExprChild.getText().toLowerCase()));
                        semanticAnalyzer.unparseTranslator.addIdentifierTranslation(selExprChild);
                        break;
                    case HiveASTParser.TOK_TABALIAS:
                        assert (selExprChild.getChildCount() == 1);
                        udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0).getText());
                        qb.addAlias(udtfTableAlias);
                        semanticAnalyzer.unparseTranslator.addIdentifierTranslation(
                                (HiveParserASTNode) selExprChild.getChild(0));
                        break;
                    default:
                        throw new SemanticException(
                                "Find invalid token type " + selExprChild.getType() + " in UDTF.");
                }
            }
            LOG.debug("UDTF table alias is " + udtfTableAlias);
            LOG.debug("UDTF col aliases are " + udtfColAliases);
        }

        // 6. Iterate over all expression (after SELECT)
        HiveParserASTNode exprList;
        if (isInTransform) {
            exprList = (HiveParserASTNode) trfm.getChild(0);
        } else if (udtfOperator != null) {
            exprList = expr;
        } else {
            exprList = selExprList;
        }
        // For UDTF's, skip the function name to get the expressions
        int startPos = udtfOperator != null ? posn + 1 : posn;
        if (isInTransform) {
            startPos = 0;
        }

        // track the col aliases provided by user
        List<String> colAliases = new ArrayList<>();
        for (int i = startPos; i < exprList.getChildCount(); ++i) {
            colAliases.add(null);

            // 6.1 child can be EXPR AS ALIAS, or EXPR.
            HiveParserASTNode child = (HiveParserASTNode) exprList.getChild(i);
            boolean hasAsClause = child.getChildCount() == 2 && !isInTransform;
            boolean isWindowSpec =
                    child.getChildCount() == 3
                            && child.getChild(2).getType() == HiveASTParser.TOK_WINDOWSPEC;

            // 6.2 EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's
            // This check is not needed and invalid when there is a transform b/c the AST's are
            // slightly different.
            if (!isWindowSpec
                    && !isInTransform
                    && udtfOperator == null
                    && child.getChildCount() > 2) {
                throw new SemanticException(
                        generateErrorMessage(
                                (HiveParserASTNode) child.getChild(2),
                                ErrorMsg.INVALID_AS.getMsg()));
            }

            String tabAlias;
            String colAlias;

            if (isInTransform || udtfOperator != null) {
                tabAlias = null;
                colAlias = semanticAnalyzer.getAutogenColAliasPrfxLbl() + i;
                expr = child;
            } else {
                // 6.3 Get rid of TOK_SELEXPR
                expr = (HiveParserASTNode) child.getChild(0);
                String[] colRef =
                        HiveParserUtils.getColAlias(
                                child,
                                semanticAnalyzer.getAutogenColAliasPrfxLbl(),
                                inputRR,
                                semanticAnalyzer.autogenColAliasPrfxIncludeFuncName(),
                                i);
                tabAlias = colRef[0];
                colAlias = colRef[1];
                if (hasAsClause) {
                    colAliases.set(colAliases.size() - 1, colAlias);
                    semanticAnalyzer.unparseTranslator.addIdentifierTranslation(
                            (HiveParserASTNode) child.getChild(1));
                }
            }

            Map<HiveParserASTNode, RelNode> subQueryToRelNode = new HashMap<>();
            boolean isSubQuery = genSubQueryRelNode(qb, expr, srcRel, false, subQueryToRelNode);
            if (isSubQuery) {
                ExprNodeDesc subQueryDesc =
                        semanticAnalyzer.genExprNodeDesc(
                                expr,
                                relToRowResolver.get(srcRel),
                                outerRR,
                                subQueryToRelNode,
                                false);
                exprNodeDescs.add(subQueryDesc);

                ColumnInfo colInfo =
                        new ColumnInfo(
                                getColumnInternalName(pos),
                                subQueryDesc.getWritableObjectInspector(),
                                tabAlias,
                                false);
                if (!outRR.putWithCheck(tabAlias, colAlias, null, colInfo)) {
                    throw new SemanticException(
                            "Cannot add column to RR: "
                                    + tabAlias
                                    + "."
                                    + colAlias
                                    + " => "
                                    + colInfo
                                    + " due to duplication, see previous warnings");
                }
            } else {
                // 6.4 Build ExprNode corresponding to columns
                if (expr.getType() == HiveASTParser.TOK_ALLCOLREF) {
                    pos =
                            semanticAnalyzer.genColListRegex(
                                    ".*",
                                    expr.getChildCount() == 0
                                            ? null
                                            : HiveParserBaseSemanticAnalyzer.getUnescapedName(
                                                            (HiveParserASTNode) expr.getChild(0))
                                                    .toLowerCase(),
                                    expr,
                                    exprNodeDescs,
                                    excludedColumns,
                                    inputRR,
                                    starRR,
                                    pos,
                                    outRR,
                                    qb.getAliases(),
                                    false /* don't require uniqueness */);
                } else if (expr.getType() == HiveASTParser.TOK_TABLE_OR_COL
                        && !hasAsClause
                        && !inputRR.getIsExprResolver()
                        && HiveParserUtils.isRegex(
                                unescapeIdentifier(expr.getChild(0).getText()),
                                semanticAnalyzer.getConf())) {
                    // In case the expression is a regex COL. This can only happen without AS clause
                    // We don't allow this for ExprResolver - the Group By case
                    pos =
                            semanticAnalyzer.genColListRegex(
                                    unescapeIdentifier(expr.getChild(0).getText()),
                                    null,
                                    expr,
                                    exprNodeDescs,
                                    excludedColumns,
                                    inputRR,
                                    starRR,
                                    pos,
                                    outRR,
                                    qb.getAliases(),
                                    true);
                } else if (expr.getType() == HiveASTParser.DOT
                        && expr.getChild(0).getType() == HiveASTParser.TOK_TABLE_OR_COL
                        && inputRR.hasTableAlias(
                                unescapeIdentifier(
                                        expr.getChild(0).getChild(0).getText().toLowerCase()))
                        && !hasAsClause
                        && !inputRR.getIsExprResolver()
                        && HiveParserUtils.isRegex(
                                unescapeIdentifier(expr.getChild(1).getText()),
                                semanticAnalyzer.getConf())) {
                    // In case the expression is TABLE.COL (col can be regex). This can only happen
                    // without AS clause
                    // We don't allow this for ExprResolver - the Group By case
                    pos =
                            semanticAnalyzer.genColListRegex(
                                    unescapeIdentifier(expr.getChild(1).getText()),
                                    unescapeIdentifier(
                                            expr.getChild(0).getChild(0).getText().toLowerCase()),
                                    expr,
                                    exprNodeDescs,
                                    excludedColumns,
                                    inputRR,
                                    starRR,
                                    pos,
                                    outRR,
                                    qb.getAliases(),
                                    false /* don't require uniqueness */);
                } else if (HiveASTParseUtils.containsTokenOfType(expr, HiveASTParser.TOK_FUNCTIONDI)
                        && !(srcRel instanceof Aggregate)) {
                    // Likely a malformed query eg, select hash(distinct c1) from t1;
                    throw new SemanticException("Distinct without an aggregation.");
                } else {
                    // Case when this is an expression
                    HiveParserTypeCheckCtx typeCheckCtx =
                            new HiveParserTypeCheckCtx(
                                    inputRR, true, true, frameworkConfig, cluster);
                    // We allow stateful functions in the SELECT list (but nowhere else)
                    typeCheckCtx.setAllowStatefulFunctions(true);
                    if (!qbp.getDestToGroupBy().isEmpty()) {
                        // Special handling of grouping function
                        expr =
                                rewriteGroupingFunctionAST(
                                        getGroupByForClause(qbp, selClauseName),
                                        expr,
                                        !cubeRollupGrpSetPresent);
                    }
                    ExprNodeDesc exprDesc =
                            semanticAnalyzer.genExprNodeDesc(expr, inputRR, typeCheckCtx);
                    String recommended = semanticAnalyzer.recommendName(exprDesc, colAlias);
                    if (recommended != null && outRR.get(null, recommended) == null) {
                        colAlias = recommended;
                    }
                    exprNodeDescs.add(exprDesc);

                    ColumnInfo colInfo =
                            new ColumnInfo(
                                    getColumnInternalName(pos),
                                    exprDesc.getWritableObjectInspector(),
                                    tabAlias,
                                    false);
                    colInfo.setSkewedCol(
                            exprDesc instanceof ExprNodeColumnDesc
                                    && ((ExprNodeColumnDesc) exprDesc).isSkewedCol());
                    outRR.put(tabAlias, colAlias, colInfo);

                    if (exprDesc instanceof ExprNodeColumnDesc) {
                        ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exprDesc;
                        String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn());
                        if (altMapping != null) {
                            // TODO: this can overwrite the mapping. Should this be allowed?
                            outRR.put(altMapping[0], altMapping[1], colInfo);
                        }
                    }

                    pos++;
                }
            }
        }
        // 7. Convert Hive projections to Calcite
        List<RexNode> calciteColLst = new ArrayList<>();

        HiveParserRexNodeConverter rexNodeConverter =
                new HiveParserRexNodeConverter(
                        cluster,
                        srcRel.getRowType(),
                        outerNameToPos,
                        buildHiveColNameToInputPosMap(exprNodeDescs, inputRR),
                        relToRowResolver.get(srcRel),
                        outerRR,
                        0,
                        false,
                        subqueryId,
                        funcConverter);
        for (ExprNodeDesc colExpr : exprNodeDescs) {
            RexNode calciteCol = rexNodeConverter.convert(colExpr);
            calciteCol = convertNullLiteral(calciteCol).accept(funcConverter);
            calciteColLst.add(calciteCol);
        }

        // 8. Build Calcite Rel
        RelNode res;
        if (isInTransform) {
            HiveParserScriptTransformHelper transformHelper =
                    new HiveParserScriptTransformHelper(
                            cluster, relToRowResolver, relToHiveColNameCalcitePosMap, hiveConf);
            res = transformHelper.genScriptPlan(trfm, qb, calciteColLst, srcRel);
        } else if (udtfOperator != null) {
            // The basic idea for CBO support of UDTF is to treat UDTF as a special project.
            res =
                    genUDTFPlan(
                            udtfOperator,
                            genericUDTFName,
                            udtfTableAlias,
                            udtfColAliases,
                            qb,
                            calciteColLst,
                            outRR.getColumnInfos(),
                            srcRel,
                            true,
                            false);
        } else {
            // If it's a subquery and the project is identity, we skip creating this project.
            // This is to handle an issue with calcite SubQueryRemoveRule. The rule checks col
            // uniqueness by calling
            // RelMetadataQuery::areColumnsUnique with an empty col set, which always returns null
            // for a project
            // and thus introduces unnecessary agg node.
            if (HiveParserUtils.isIdentityProject(srcRel, calciteColLst, colAliases)
                    && outerRR != null) {
                res = srcRel;
            } else {
                res = genSelectRelNode(calciteColLst, outRR, srcRel);
            }
        }

        // 9. Handle select distinct as GBY if there exist windowing functions
        if (selForWindow != null
                && selExprList.getToken().getType() == HiveASTParser.TOK_SELECTDI) {
            ImmutableBitSet groupSet =
                    ImmutableBitSet.range(res.getRowType().getFieldList().size());
            res =
                    LogicalAggregate.create(
                            res,
                            ImmutableList.of(),
                            groupSet,
                            Collections.emptyList(),
                            Collections.emptyList());
            HiveParserRowResolver groupByOutputRowResolver = new HiveParserRowResolver();
            for (int i = 0; i < outRR.getColumnInfos().size(); i++) {
                ColumnInfo colInfo = outRR.getColumnInfos().get(i);
                ColumnInfo newColInfo =
                        new ColumnInfo(
                                colInfo.getInternalName(),
                                colInfo.getType(),
                                colInfo.getTabAlias(),
                                colInfo.getIsVirtualCol());
                groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo);
            }
            relToHiveColNameCalcitePosMap.put(
                    res, buildHiveToCalciteColumnMap(groupByOutputRowResolver));
            relToRowResolver.put(res, groupByOutputRowResolver);
        }

        inputRR.setCheckForAmbiguity(false);
        if (selForWindow != null && res instanceof Project) {
            // if exist windowing expression, trim the project node with window
            res =
                    HiveParserProjectWindowTrimmer.trimProjectWindow(
                            (Project) res,
                            (Project) selForWindow,
                            relToRowResolver,
                            relToHiveColNameCalcitePosMap);
        }

        return res;
    }