in fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java [1296:1691]
private void analyzeAggregation(Analyzer analyzer) throws AnalysisException {
// check having clause
if (havingClause != null) {
Expr ambiguousAlias = getFirstAmbiguousAlias(havingClause);
if (ambiguousAlias != null) {
ErrorReport.reportAnalysisException(ErrorCode.ERR_NON_UNIQ_ERROR, ambiguousAlias.toColumnLabel());
}
/*
* The having clause need to be substitute by aliasSMap.
* And it is analyzed after substitute.
* For example:
* Query: select k1 a, sum(k2) b from table group by k1 having a > 1;
* Having clause: a > 1
* aliasSMap: <a, table.k1> <b, sum(table.k2)>
* After substitute: a > 1 changed to table.k1 > 1
* Analyzer: check column and other subquery in having clause
* having predicate: table.k1 > 1
*/
/*
* TODO(ml): support substitute outer column in correlated subquery
* For example: select k1 key, sum(k1) sum_k1 from table a group by k1
* having k1 >
* (select min(k1) from table b where a.key=b.k2);
* TODO: the a.key should be replaced by a.k1 instead of unknown column 'key' in 'a'
*/
/* according to mysql (https://dev.mysql.com/doc/refman/8.0/en/select.html)
* "For GROUP BY or HAVING clauses, it searches the FROM clause before searching in the
* select_expr values. (For GROUP BY and HAVING, this differs from the pre-MySQL 5.0 behavior
* that used the same rules as for ORDER BY.)"
* case1: having clause use column name table.v1, because it searches the FROM clause firstly
* select id, sum(v1) v1 from table group by id,v1 having(v1>1);
* case2: having clause used in aggregate functions, such as sum(v2) here
* select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND sum(v2)>1);
* case3: having clause use alias name v, because table do not have column name v
* select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1);
* case4: having clause use alias name vsum, because table do not have column name vsum
* select id, floor(v1) v, sum(v2) vsum from table group by id,v having(v>1 AND vsum>1);
*/
if (groupByClause != null) {
boolean aliasFirst = false;
if (analyzer.getContext() != null) {
aliasFirst = analyzer.getContext().getSessionVariable().isGroupByAndHavingUseAliasFirst();
}
if (!aliasFirst) {
ExprSubstitutionMap excludeAliasSMap = aliasSMap.clone();
List<Expr> havingSlots = Lists.newArrayList();
havingClause.collect(SlotRef.class, havingSlots);
for (Expr expr : havingSlots) {
if (excludeAliasSMap.get(expr) == null) {
continue;
}
try {
// try to use column name firstly
expr.clone().analyze(analyzer);
// analyze success means column name exist, do not use alias name
excludeAliasSMap.removeByLhsExpr(expr);
} catch (AnalysisException ex) {
// according to case3, column name do not exist, keep alias name inside alias map
if (ConnectContext.get() != null) {
ConnectContext.get().getState().reset();
}
}
}
havingClauseAfterAnalyzed = havingClause.substitute(excludeAliasSMap, analyzer, false);
} else {
// If user set force using alias, then having clauses prefer using alias rather than column name
havingClauseAfterAnalyzed = havingClause.substitute(aliasSMap, analyzer, false);
}
} else {
// according to mysql
// if there is no group by clause, the having clause should use alias
havingClauseAfterAnalyzed = havingClause.substitute(aliasSMap, analyzer, false);
}
havingClauseAfterAnalyzed = rewriteQueryExprByMvColumnExpr(havingClauseAfterAnalyzed, analyzer);
if (!havingClauseAfterAnalyzed.getType().isBoolean()) {
havingClauseAfterAnalyzed = havingClauseAfterAnalyzed.castTo(Type.BOOLEAN);
}
havingClauseAfterAnalyzed.checkReturnsBool("HAVING clause", true);
if (groupingInfo != null) {
groupingInfo.substituteGroupingFn(Arrays.asList(havingClauseAfterAnalyzed), analyzer);
}
// can't contain analytic exprs
Expr analyticExpr = havingClauseAfterAnalyzed.findFirstOf(AnalyticExpr.class);
if (analyticExpr != null) {
throw new AnalysisException(
"HAVING clause must not contain analytic expressions: "
+ analyticExpr.toSql());
}
if (isContainInBitmap(havingClauseAfterAnalyzed)) {
throw new AnalysisException(
"HAVING clause dose not support in bitmap syntax: " + havingClauseAfterAnalyzed.toSql());
}
}
if (groupByClause == null && !selectList.isDistinct()
&& !TreeNode.contains(resultExprs, Expr.isAggregatePredicate())
&& (havingClauseAfterAnalyzed == null || !havingClauseAfterAnalyzed.contains(
Expr.isAggregatePredicate()))
&& (sortInfo == null || !TreeNode.contains(sortInfo.getOrderingExprs(),
Expr.isAggregatePredicate()))) {
// We're not computing aggregates but we still need to register the HAVING
// clause which could, e.g., contain a constant expression evaluating to false.
if (havingClauseAfterAnalyzed != null) {
if (havingClauseAfterAnalyzed.contains(Subquery.class)) {
throw new AnalysisException("Only constant expr could be supported in having clause "
+ "when no aggregation in stmt");
}
analyzer.registerConjuncts(havingClauseAfterAnalyzed, true);
}
return;
}
// If we're computing an aggregate, we must have a FROM clause.
if (fromClause.size() == 0) {
throw new AnalysisException("Aggregation without a FROM clause is not allowed");
}
if (selectList.isDistinct() && groupByClause == null) {
List<Expr> aggregateExpr = Lists.newArrayList();
TreeNode.collect(resultExprs, Expr.isAggregatePredicate(), aggregateExpr);
if (aggregateExpr.size() == resultExprs.size()) {
selectList.setIsDistinct(false);
}
}
if (selectList.isDistinct()
&& (groupByClause != null
|| TreeNode.contains(resultExprs, Expr.isAggregatePredicate())
|| (havingClauseAfterAnalyzed != null && havingClauseAfterAnalyzed.contains(
Expr.isAggregatePredicate())))) {
throw new AnalysisException("cannot combine SELECT DISTINCT with aggregate functions or GROUP BY");
}
// disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to
// name all star-expanded cols in the group by clause you might as well do it
// in the select list)
if (groupByClause != null || TreeNode.contains(resultExprs, Expr.isAggregatePredicate())) {
for (SelectListItem item : selectList.getItems()) {
if (item.isStar()) {
throw new AnalysisException(
"cannot combine '*' in select list with GROUP BY: " + item.toSql());
}
}
}
// can't contain analytic exprs
ArrayList<Expr> aggExprsForChecking = Lists.newArrayList();
TreeNode.collect(resultExprs, Expr.isAggregatePredicate(), aggExprsForChecking);
ArrayList<Expr> analyticExprs = Lists.newArrayList();
TreeNode.collect(aggExprsForChecking, AnalyticExpr.class, analyticExprs);
if (!analyticExprs.isEmpty()) {
throw new AnalysisException(
"AGGREGATE clause must not contain analytic expressions");
}
// Collect the aggregate expressions from the SELECT, HAVING and ORDER BY clauses
// of this statement.
ArrayList<FunctionCallExpr> aggExprs = Lists.newArrayList();
TreeNode.collect(resultExprs, Expr.isAggregatePredicate(), aggExprs);
if (havingClauseAfterAnalyzed != null) {
havingClauseAfterAnalyzed.collect(Expr.isAggregatePredicate(), aggExprs);
}
if (sortInfo != null) {
// TODO: Avoid evaluating aggs in ignored order-bys
TreeNode.collect(sortInfo.getOrderingExprs(), Expr.isAggregatePredicate(), aggExprs);
}
// When DISTINCT aggregates are present, non-distinct (i.e. ALL) aggregates are
// evaluated in two phases (see AggregateInfo for more details). In particular,
// COUNT(c) in "SELECT COUNT(c), AGG(DISTINCT d) from R" is transformed to
// "SELECT SUM(cnt) FROM (SELECT COUNT(c) as cnt from R group by d ) S".
// Since a group-by expression is added to the inner query it returns no rows if
// R is empty, in which case the SUM of COUNTs will return NULL.
// However the original COUNT(c) should have returned 0 instead of NULL in this case.
// Therefore, COUNT([ALL]) is transformed into zeroifnull(COUNT([ALL]) if
// i) There is no GROUP-BY clause, and
// ii) Other DISTINCT aggregates are present.
ExprSubstitutionMap countAllMap = createCountAllMap(aggExprs, analyzer);
final ExprSubstitutionMap multiDistinctAggMap =
createMultiDistinctAggSMap(aggExprs, analyzer);
countAllMap = ExprSubstitutionMap.compose(multiDistinctAggMap, countAllMap, analyzer);
List<Expr> substitutedAggs =
Expr.substituteList(aggExprs, countAllMap, analyzer, false);
// the resultExprs and havingClause must substitute in the same way as aggExprs
// then resultExprs and havingClause can be substitute correctly using combinedSmap
resultExprs = Expr.substituteList(resultExprs, countAllMap, analyzer, false);
if (havingClauseAfterAnalyzed != null) {
havingClauseAfterAnalyzed =
havingClauseAfterAnalyzed.substitute(countAllMap, analyzer, false);
}
if (sortInfo != null) {
// the ordering exprs must substitute in the same way as resultExprs
sortInfo.substituteOrderingExprs(countAllMap, analyzer);
}
aggExprs.clear();
TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), aggExprs);
List<TupleId> groupingByTupleIds = new ArrayList<>();
if (groupByClause != null) {
groupByClause.genGroupingExprs();
ArrayList<Expr> groupingExprs = groupByClause.getGroupingExprs();
if (groupingInfo != null) {
groupingInfo.buildRepeat(groupingExprs, groupByClause.getGroupingSetList());
}
boolean aliasFirst = false;
if (analyzer.getContext() != null) {
aliasFirst = analyzer.getContext().getSessionVariable().isGroupByAndHavingUseAliasFirst();
}
substituteOrdinalsAliases(groupingExprs, "GROUP BY", analyzer, aliasFirst);
// the groupingExprs must substitute in the same way as resultExprs
groupingExprs = Expr.substituteList(groupingExprs, countAllMap, analyzer, false);
if (!groupByClause.isGroupByExtension() && !groupingExprs.isEmpty()) {
/*
For performance reason, we want to remove constant column from groupingExprs.
For example:
`select sum(T.A) from T group by T.B, 'xyz'` is equivalent to `select sum(T.A) from T group by T.B`
We can remove constant column `abc` from groupingExprs.
But there is an exception when all groupingExpr are constant
For example:
sql1: `select 'abc' from t group by 'abc'`
is not equivalent to
sql2: `select 'abc' from t`
sql3: `select 'abc', sum(a) from t group by 'abc'`
is not equivalent to
sql4: `select 1, sum(a) from t`
(when t is empty, sql3 returns 0 tuple, sql4 return 1 tuple)
We need to keep some constant columns if all groupingExpr are constant.
Consider sql5 `select a from (select "abc" as a, 'def' as b) T group by b, a;`
if the constant column is in select list, this column should not be removed.
*/
Expr theFirstConstantGroupingExpr = null;
boolean someGroupExprRemoved = false;
ArrayList<Expr> tempExprs = new ArrayList<>();
for (Expr groupExpr : groupingExprs) {
//remove groupExpr if it is const, and it is not in select list
boolean removeConstGroupingKey = false;
if (groupExpr.isConstant() && !(groupExpr.contains(e -> e instanceof SlotRef))) {
if (theFirstConstantGroupingExpr == null) {
theFirstConstantGroupingExpr = groupExpr;
}
boolean keyInSelectList = false;
if (groupExpr instanceof SlotRef) {
for (SelectListItem item : selectList.getItems()) {
if (item.getExpr() instanceof SlotRef) {
keyInSelectList = ((SlotRef) item.getExpr()).columnEqual(groupExpr);
if (keyInSelectList) {
break;
}
}
}
}
removeConstGroupingKey = ! keyInSelectList;
}
if (removeConstGroupingKey) {
someGroupExprRemoved = true;
} else {
tempExprs.add(groupExpr);
}
}
if (someGroupExprRemoved) {
groupingExprs.clear();
groupingExprs.addAll(tempExprs);
//groupingExprs need at least one expr, it can be
//any original grouping expr. we use the first one.
if (groupingExprs.isEmpty()) {
groupingExprs.add(theFirstConstantGroupingExpr);
}
}
}
for (int i = 0; i < groupingExprs.size(); i++) {
groupingExprs.set(i, rewriteQueryExprByMvColumnExpr(groupingExprs.get(i), analyzer));
}
if (groupingInfo != null) {
groupingInfo.genOutputTupleDescAndSMap(analyzer, groupingExprs, aggExprs);
// must do it before copying for createAggInfo()
groupingByTupleIds.add(groupingInfo.getOutputTupleDesc().getId());
}
groupByClause.analyze(analyzer);
createAggInfo(groupingExprs, aggExprs, analyzer);
} else {
createAggInfo(new ArrayList<>(), aggExprs, analyzer);
}
// we remove all constant in group by expressions, when all exprs are constant
// and no aggregate expr in select list, we do not generate aggInfo at all.
if (aggInfo == null) {
return;
}
// combine avg smap with the one that produces the final agg output
AggregateInfo finalAggInfo =
aggInfo.getSecondPhaseDistinctAggInfo() != null
? aggInfo.getSecondPhaseDistinctAggInfo()
: aggInfo;
groupingByTupleIds.add(finalAggInfo.getOutputTupleId());
ExprSubstitutionMap combinedSmap = ExprSubstitutionMap.compose(
countAllMap, finalAggInfo.getOutputSmap(), analyzer);
// change select list, having and ordering exprs to point to agg output. We need
// to reanalyze the exprs at this point.
if (LOG.isDebugEnabled()) {
LOG.debug("combined smap: " + combinedSmap.debugString());
LOG.debug("desctbl: " + analyzer.getDescTbl().debugString());
LOG.debug("resultexprs: " + Expr.debugString(resultExprs));
}
if (havingClauseAfterAnalyzed != null) {
// forbidden correlated subquery in having clause
List<Subquery> subqueryInHaving = Lists.newArrayList();
havingClauseAfterAnalyzed.collect(Subquery.class, subqueryInHaving);
for (Subquery subquery : subqueryInHaving) {
if (subquery.isCorrelatedPredicate(getTableRefIds())) {
throw new AnalysisException("The correlated having clause is not supported");
}
}
}
/*
* All of columns of result and having clause are replaced by new slot ref
* which is bound by top tuple of agg info.
* For example:
* ResultExprs: SlotRef(k1), FunctionCall(sum(SlotRef(k2)))
* Having predicate: FunctionCall(sum(SlotRef(k2))) > subquery
* CombinedSMap: <SlotRef(k1) tuple 0, SlotRef(k1) of tuple 3>,
* <FunctionCall(SlotRef(k2)) tuple 0, SlotRef(sum(k2)) of tuple 3>
*
* After rewritten:
* ResultExprs: SlotRef(k1) of tuple 3, SlotRef(sum(k2)) of tuple 3
* Having predicate: SlotRef(sum(k2)) of tuple 3 > subquery
*/
resultExprs = Expr.substituteList(resultExprs, combinedSmap, analyzer, false);
if (LOG.isDebugEnabled()) {
LOG.debug("post-agg selectListExprs: " + Expr.debugString(resultExprs));
}
if (havingClauseAfterAnalyzed != null) {
havingPred = havingClauseAfterAnalyzed.substitute(combinedSmap, analyzer, false);
analyzer.registerConjuncts(havingPred, true, finalAggInfo.getOutputTupleId().asList());
if (LOG.isDebugEnabled()) {
LOG.debug("post-agg havingPred: " + havingPred.debugString());
}
}
if (sortInfo != null) {
sortInfo.substituteOrderingExprs(combinedSmap, analyzer);
if (LOG.isDebugEnabled()) {
LOG.debug("post-agg orderingExprs: "
+ Expr.debugString(sortInfo.getOrderingExprs()));
}
}
// check that all post-agg exprs point to agg output
for (int i = 0; i < selectList.getItems().size(); ++i) {
if (!resultExprs.get(i).isBoundByTupleIds(groupingByTupleIds)) {
if (CreateMaterializedViewStmt.isMVColumn(resultExprs.get(i).toSqlWithoutTbl())) {
List<TupleId> tupleIds = Lists.newArrayList();
List<SlotId> slotIds = Lists.newArrayList();
resultExprs.get(i).getIds(tupleIds, slotIds);
for (TupleId id : tupleIds) {
updateDisableTuplesMVRewriter(id);
}
throw new MVSelectFailedException("Materialized View rewrite invalid");
} else {
throw new AnalysisException(
"select list expression not produced by aggregation output " + "(missing from "
+ "GROUP BY clause?): " + selectList.getItems().get(i).getExpr().toSql());
}
}
}
if (orderByElements != null) {
for (int i = 0; i < orderByElements.size(); ++i) {
if (!sortInfo.getOrderingExprs().get(i).isBoundByTupleIds(groupingByTupleIds)) {
throw new AnalysisException(
"ORDER BY expression not produced by aggregation output " + "(missing from "
+ "GROUP BY clause?): " + orderByElements.get(i).getExpr().toSql());
}
if (sortInfo.getOrderingExprs().get(i).type.isObjectStored()) {
throw new AnalysisException("ORDER BY expression could not contain object-stored columnx.");
}
}
}
if (havingPred != null) {
if (!havingPred.isBoundByTupleIds(groupingByTupleIds)) {
throw new AnalysisException(
"HAVING clause not produced by aggregation output " + "(missing from GROUP BY "
+ "clause?): " + havingClause.toSql());
}
}
}