in fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java [184:381]
private void rewriteSetOperationStatement(SetOperationStmt stmt, Analyzer analyzer)
throws AnalysisException {
// Early out for UnionStmt as we don't rewrite the union operator
if (stmt instanceof UnionStmt) {
for (SetOperand operand : stmt.getOperands()) {
rewriteQueryStatement(operand.getQueryStmt(), operand.getAnalyzer());
if (operand.getQueryStmt() instanceof SetOperationStmt
&& !(operand.getQueryStmt() instanceof UnionStmt)) {
SetOperationStmt setOpStmt = ((SetOperationStmt) operand.getQueryStmt());
if (setOpStmt.hasRewrittenStmt()) {
QueryStmt rewrittenStmt = setOpStmt.getRewrittenStmt();
operand.setQueryStmt(rewrittenStmt);
}
}
}
return;
}
// During each iteration of the loop below, exactly one of eiSelect or uSelect becomes
// non-null, they function as placeholders for the current sequence of rewrites for
// except/intersect or union operands respectively. If the last operand processed was
// a union, uSelect is the current select statement that has unionStmt nested inside,
// which in turn contains preceding union operands. If the last operator processed
// was an except or intersect, eiSelect is the current select statement containing
// preceding except or intersect operands in the from clause.
TableAliasGenerator tableAliasGenerator = new TableAliasGenerator(analyzer, null);
SelectStmt uSelect = null, eiSelect = null;
SetOperationStmt unionStmt = null;
SetOperand firstOperand = stmt.getOperands().get(0);
rewriteQueryStatement(firstOperand.getQueryStmt(), firstOperand.getAnalyzer());
if (firstOperand.getQueryStmt() instanceof SetOperationStmt) {
SetOperationStmt setOpStmt = ((SetOperationStmt) firstOperand.getQueryStmt());
if (setOpStmt.hasRewrittenStmt()) {
firstOperand.setQueryStmt(setOpStmt.getRewrittenStmt());
}
}
for (int i = 1; i < stmt.getOperands().size(); ++i) {
SetOperand operand = stmt.getOperands().get(i);
rewriteQueryStatement(operand.getQueryStmt(), operand.getAnalyzer());
if (operand.getQueryStmt() instanceof SetOperationStmt) {
SetOperationStmt setOpStmt = ((SetOperationStmt) operand.getQueryStmt());
if (setOpStmt.hasRewrittenStmt()) {
operand.setQueryStmt(setOpStmt.getRewrittenStmt());
}
}
switch (operand.getSetOperator()) {
case EXCEPT:
case INTERSECT:
if (eiSelect == null) {
// For a new SelectStmt the left most tableref will either be the first
// operand or a the SelectStmt from the union operands.
InlineViewRef leftMostView = null;
SelectList sl =
new SelectList(Lists.newArrayList(SelectListItem.createStarItem(null)));
// Intersect/Except have set semantics in SQL they must not return duplicates
// As an optimization we push this distinct down into the first operand if
// it's not a UNION and has no other aggregations.
// This would be best done in a cost based manner during planning.
sl.setIsDistinct(true);
eiSelect = new SelectStmt(sl, null, null, null, null, null, null);
if (i == 1) {
if (firstOperand.getQueryStmt() instanceof SelectStmt) {
// push down the distinct aggregation if the first operand isn't a UNION
// there are no window functions, and we determine the results exprs
// already produce distinct results.
SelectStmt firstOpStmt = (SelectStmt) firstOperand.getQueryStmt();
// DISTINCT is already set
if (firstOpStmt.getSelectList().isDistinct()) {
sl.setIsDistinct(false);
} else {
// Must not have window functions
if (firstOpStmt.getTableRefs().size() > 0
&& !firstOpStmt.hasAnalyticInfo()) {
// Add distinct if there isn't any other grouping
if (!firstOpStmt.hasMultiAggInfo()) {
firstOpStmt.getSelectList().setIsDistinct(true);
}
sl.setIsDistinct(false);
}
}
}
leftMostView = new InlineViewRef(tableAliasGenerator.getNextAlias(),
firstOperand.getQueryStmt(), (TableSampleClause) null);
leftMostView.analyze(analyzer);
eiSelect.getTableRefs().add(leftMostView);
}
// There was a union operator before this one.
if (uSelect != null) {
Preconditions.checkState(i != 1);
if (uSelect.getSelectList().isDistinct()
&& eiSelect.getTableRefs().size() == 0) {
// optimize out the distinct aggregation in the outer query
sl.setIsDistinct(false);
}
leftMostView = new InlineViewRef(
tableAliasGenerator.getNextAlias(), uSelect, (TableSampleClause) null);
leftMostView.analyze(analyzer);
eiSelect.getTableRefs().add(leftMostView);
uSelect = null;
}
}
// INTERSECT => Left Semi Join and EXCEPT => Left Anti Join
JoinOperator joinOp = operand.getSetOperator() == SetOperator.EXCEPT ?
JoinOperator.LEFT_ANTI_JOIN :
JoinOperator.LEFT_SEMI_JOIN;
TableRef rightMostTbl =
eiSelect.getTableRefs().get(eiSelect.getTableRefs().size() - 1);
// As an optimization we can rewrite INTERSECT with an inner join if both
// operands return distinct rows.
if (operand.getQueryStmt() instanceof SelectStmt) {
SelectStmt inner = ((SelectStmt) operand.getQueryStmt());
if (inner.getSelectList().isDistinct()) {
if (rightMostTbl instanceof InlineViewRef) {
QueryStmt outer = ((InlineViewRef) rightMostTbl).getViewStmt();
if (outer instanceof SelectStmt) {
if (((SelectStmt) outer).getSelectList().isDistinct()
&& operand.getSetOperator() == SetOperator.INTERSECT) {
joinOp = JoinOperator.INNER_JOIN;
TableRef firstTbl = eiSelect.getTableRefs().get(0);
// Make sure only the leftmost view's tuples are visible
eiSelect.getSelectList().getItems().set(0, SelectListItem
.createStarItem(Lists.newArrayList(firstTbl.getUniqueAlias())));
}
}
}
}
}
List<String> colLabels = new ArrayList<>();
for (int j = 0; j < operand.getQueryStmt().getColLabels().size(); ++j) {
colLabels.add(eiSelect.getColumnAliasGenerator().getNextAlias());
}
// Wraps the query statement for the current operand.
InlineViewRef opWrapperView = new InlineViewRef(
tableAliasGenerator.getNextAlias(), operand.getQueryStmt(), colLabels);
opWrapperView.setLeftTblRef(rightMostTbl);
opWrapperView.setJoinOp(joinOp);
opWrapperView.setOnClause(
getSetOpJoinPredicates((InlineViewRef) eiSelect.getTableRefs().get(0),
opWrapperView, operand.getSetOperator()));
opWrapperView.analyze(analyzer);
eiSelect.getTableRefs().add(opWrapperView);
break;
case UNION:
// Create a new SelectStmt for unions.
if (uSelect == null) {
unionStmt = null;
SelectList sl =
new SelectList(Lists.newArrayList(SelectListItem.createStarItem(null)));
uSelect = new SelectStmt(sl, null, null, null, null, null, null);
SetOperationStmt.SetOperand eiOperand = null;
if (eiSelect != null) {
eiOperand = new SetOperationStmt.SetOperand(eiSelect, null, null);
eiSelect = null;
}
List<SetOperationStmt.SetOperand> initialOps = new ArrayList<>();
if (i == 1) {
initialOps.add(firstOperand);
firstOperand = null;
}
if (eiOperand != null) {
initialOps.add(eiOperand);
}
unionStmt = new UnionStmt(initialOps, null, null);
uSelect.getTableRefs().add(new InlineViewRef(
tableAliasGenerator.getNextAlias(), unionStmt, (TableSampleClause) null));
}
operand.reset();
unionStmt.getOperands().add(operand);
break;
default:
throw new AnalysisException("Unknown Set Operation Statement Operator Type");
}
}
final SelectStmt newStmt = uSelect != null ? uSelect : eiSelect;
Preconditions.checkNotNull(newStmt);
newStmt.limitElement_ = stmt.limitElement_;
newStmt.limitElement_.reset();
if (stmt.hasOrderByClause()) {
newStmt.orderByElements_ = stmt.cloneOrderByElements();
if (newStmt.orderByElements_ != null) {
for (OrderByElement o : newStmt.orderByElements_) o.getExpr().reset();
}
}
newStmt.analyze(analyzer);
stmt.rewrittenStmt_ = newStmt;
}