private void rewriteSetOperationStatement()

in fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java [184:381]


  private void rewriteSetOperationStatement(SetOperationStmt stmt, Analyzer analyzer)
      throws AnalysisException {
    // Early out for UnionStmt as we don't rewrite the union operator
    if (stmt instanceof UnionStmt) {
      for (SetOperand operand : stmt.getOperands()) {
        rewriteQueryStatement(operand.getQueryStmt(), operand.getAnalyzer());
        if (operand.getQueryStmt() instanceof SetOperationStmt
            && !(operand.getQueryStmt() instanceof UnionStmt)) {
          SetOperationStmt setOpStmt = ((SetOperationStmt) operand.getQueryStmt());
          if (setOpStmt.hasRewrittenStmt()) {
            QueryStmt rewrittenStmt = setOpStmt.getRewrittenStmt();
            operand.setQueryStmt(rewrittenStmt);
          }
        }
      }
      return;
    }

    // During each iteration of the loop below, exactly one of eiSelect or uSelect becomes
    // non-null, they function as placeholders for the current sequence of rewrites for
    // except/intersect or union operands respectively. If the last operand processed was
    // a union, uSelect is the current select statement that has unionStmt nested inside,
    // which in turn contains preceding union operands.  If the last operator processed
    // was an except or intersect, eiSelect is the current select statement containing
    // preceding except or intersect operands in the from clause.
    TableAliasGenerator tableAliasGenerator = new TableAliasGenerator(analyzer, null);
    SelectStmt uSelect = null, eiSelect = null;
    SetOperationStmt unionStmt = null;

    SetOperand firstOperand = stmt.getOperands().get(0);
    rewriteQueryStatement(firstOperand.getQueryStmt(), firstOperand.getAnalyzer());
    if (firstOperand.getQueryStmt() instanceof SetOperationStmt) {
      SetOperationStmt setOpStmt = ((SetOperationStmt) firstOperand.getQueryStmt());
      if (setOpStmt.hasRewrittenStmt()) {
        firstOperand.setQueryStmt(setOpStmt.getRewrittenStmt());
      }
    }

    for (int i = 1; i < stmt.getOperands().size(); ++i) {
      SetOperand operand = stmt.getOperands().get(i);
      rewriteQueryStatement(operand.getQueryStmt(), operand.getAnalyzer());
      if (operand.getQueryStmt() instanceof SetOperationStmt) {
        SetOperationStmt setOpStmt = ((SetOperationStmt) operand.getQueryStmt());
        if (setOpStmt.hasRewrittenStmt()) {
          operand.setQueryStmt(setOpStmt.getRewrittenStmt());
        }
      }

      switch (operand.getSetOperator()) {
        case EXCEPT:
        case INTERSECT:
          if (eiSelect == null) {
            // For a new SelectStmt the left most tableref will either be the first
            // operand or a the SelectStmt from the union operands.
            InlineViewRef leftMostView = null;
            SelectList sl =
                new SelectList(Lists.newArrayList(SelectListItem.createStarItem(null)));
            // Intersect/Except have set semantics in SQL they must not return duplicates
            // As an optimization we push this distinct down into the first operand if
            // it's not a UNION and has no other aggregations.
            // This would be best done in a cost based manner during planning.
            sl.setIsDistinct(true);
            eiSelect = new SelectStmt(sl, null, null, null, null, null, null);

            if (i == 1) {
              if (firstOperand.getQueryStmt() instanceof SelectStmt) {
                // push down the distinct aggregation if the first operand isn't a UNION
                // there are no window functions, and we determine the results exprs
                // already produce distinct results.
                SelectStmt firstOpStmt = (SelectStmt) firstOperand.getQueryStmt();
                // DISTINCT is already set
                if (firstOpStmt.getSelectList().isDistinct()) {
                  sl.setIsDistinct(false);
                } else {
                  // Must not have window functions
                  if (firstOpStmt.getTableRefs().size() > 0
                      && !firstOpStmt.hasAnalyticInfo()) {
                    // Add distinct if there isn't any other grouping
                    if (!firstOpStmt.hasMultiAggInfo()) {
                      firstOpStmt.getSelectList().setIsDistinct(true);
                    }
                    sl.setIsDistinct(false);
                  }
                }
              }
              leftMostView = new InlineViewRef(tableAliasGenerator.getNextAlias(),
                  firstOperand.getQueryStmt(), (TableSampleClause) null);
              leftMostView.analyze(analyzer);
              eiSelect.getTableRefs().add(leftMostView);
            }

            // There was a union operator before this one.
            if (uSelect != null) {
              Preconditions.checkState(i != 1);
              if (uSelect.getSelectList().isDistinct()
                  && eiSelect.getTableRefs().size() == 0) {
                // optimize out the distinct aggregation in the outer query
                sl.setIsDistinct(false);
              }
              leftMostView = new InlineViewRef(
                  tableAliasGenerator.getNextAlias(), uSelect, (TableSampleClause) null);
              leftMostView.analyze(analyzer);
              eiSelect.getTableRefs().add(leftMostView);
              uSelect = null;
            }
          }

          // INTERSECT => Left Semi Join and EXCEPT => Left Anti Join
          JoinOperator joinOp = operand.getSetOperator() == SetOperator.EXCEPT ?
              JoinOperator.LEFT_ANTI_JOIN :
              JoinOperator.LEFT_SEMI_JOIN;
          TableRef rightMostTbl =
              eiSelect.getTableRefs().get(eiSelect.getTableRefs().size() - 1);

          // As an optimization we can rewrite INTERSECT with an inner join if both
          // operands return distinct rows.
          if (operand.getQueryStmt() instanceof SelectStmt) {
            SelectStmt inner = ((SelectStmt) operand.getQueryStmt());
            if (inner.getSelectList().isDistinct()) {
              if (rightMostTbl instanceof InlineViewRef) {
                QueryStmt outer = ((InlineViewRef) rightMostTbl).getViewStmt();
                if (outer instanceof SelectStmt) {
                  if (((SelectStmt) outer).getSelectList().isDistinct()
                      && operand.getSetOperator() == SetOperator.INTERSECT) {
                    joinOp = JoinOperator.INNER_JOIN;
                    TableRef firstTbl = eiSelect.getTableRefs().get(0);
                    // Make sure only the leftmost view's tuples are visible
                    eiSelect.getSelectList().getItems().set(0, SelectListItem
                        .createStarItem(Lists.newArrayList(firstTbl.getUniqueAlias())));
                  }
                }
              }
            }
          }
          List<String> colLabels = new ArrayList<>();
          for (int j = 0; j < operand.getQueryStmt().getColLabels().size(); ++j) {
            colLabels.add(eiSelect.getColumnAliasGenerator().getNextAlias());
          }
          // Wraps the query statement for the current operand.
          InlineViewRef opWrapperView = new InlineViewRef(
              tableAliasGenerator.getNextAlias(), operand.getQueryStmt(), colLabels);
          opWrapperView.setLeftTblRef(rightMostTbl);
          opWrapperView.setJoinOp(joinOp);
          opWrapperView.setOnClause(
              getSetOpJoinPredicates((InlineViewRef) eiSelect.getTableRefs().get(0),
                  opWrapperView, operand.getSetOperator()));
          opWrapperView.analyze(analyzer);
          eiSelect.getTableRefs().add(opWrapperView);
          break;

        case UNION:
          // Create a new SelectStmt for unions.
          if (uSelect == null) {
            unionStmt = null;
            SelectList sl =
                new SelectList(Lists.newArrayList(SelectListItem.createStarItem(null)));
            uSelect = new SelectStmt(sl, null, null, null, null, null, null);
            SetOperationStmt.SetOperand eiOperand = null;
            if (eiSelect != null) {
              eiOperand = new SetOperationStmt.SetOperand(eiSelect, null, null);
              eiSelect = null;
            }
            List<SetOperationStmt.SetOperand> initialOps = new ArrayList<>();
            if (i == 1) {
              initialOps.add(firstOperand);
              firstOperand = null;
            }
            if (eiOperand != null) {
              initialOps.add(eiOperand);
            }
            unionStmt = new UnionStmt(initialOps, null, null);
            uSelect.getTableRefs().add(new InlineViewRef(
                tableAliasGenerator.getNextAlias(), unionStmt, (TableSampleClause) null));
          }
          operand.reset();
          unionStmt.getOperands().add(operand);
          break;

        default:
          throw new AnalysisException("Unknown Set Operation Statement Operator Type");
      }
    }

    final SelectStmt newStmt = uSelect != null ? uSelect : eiSelect;
    Preconditions.checkNotNull(newStmt);

    newStmt.limitElement_ = stmt.limitElement_;
    newStmt.limitElement_.reset();
    if (stmt.hasOrderByClause()) {
      newStmt.orderByElements_ = stmt.cloneOrderByElements();
      if (newStmt.orderByElements_ != null) {
        for (OrderByElement o : newStmt.orderByElements_) o.getExpr().reset();
      }
    }

    newStmt.analyze(analyzer);
    stmt.rewrittenStmt_ = newStmt;
  }