in flink-connector-hive/src/main/java/org/apache/flink/table/planner/delegation/hive/copy/HiveParserQBSubQuery.java [458:593]
public boolean subqueryRestrictionsCheck(
HiveParserRowResolver parentQueryRR, boolean forHavingClause, String outerQueryAlias)
throws SemanticException {
HiveParserASTNode insertClause =
getChildFromSubqueryAST("Insert", HiveASTParser.TOK_INSERT);
HiveParserASTNode selectClause = (HiveParserASTNode) insertClause.getChild(1);
int selectExprStart = 0;
if (selectClause.getChild(0).getType() == HiveASTParser.QUERY_HINT) {
selectExprStart = 1;
}
/*
* Check.5.h :: For In and Not In the SubQuery must implicitly or
* explicitly only contain one select item.
*/
if (operator.getType() != HiveParserQBSubQuery.SubQueryType.EXISTS
&& operator.getType() != HiveParserQBSubQuery.SubQueryType.NOT_EXISTS
&& selectClause.getChildCount() - selectExprStart > 1) {
subQueryAST.setOrigin(originalSQASTOrigin);
throw new SemanticException(
HiveParserErrorMsg.getMsg(
ErrorMsg.INVALID_SUBQUERY_EXPRESSION,
subQueryAST,
"SubQuery can contain only 1 item in Select List."));
}
boolean hasAggreateExprs = false;
boolean hasWindowing = false;
// we need to know if aggregate is COUNT since IN corr subq with count aggregate
// is not special cased later in subquery remove rule
boolean hasCount = false;
for (int i = selectExprStart; i < selectClause.getChildCount(); i++) {
HiveParserASTNode selectItem = (HiveParserASTNode) selectClause.getChild(i);
int r = HiveParserSubQueryUtils.checkAggOrWindowing(selectItem);
hasWindowing = hasWindowing | (r == 3);
hasAggreateExprs = hasAggreateExprs | (r == 1 | r == 2);
hasCount = hasCount | (r == 2);
}
HiveParserASTNode whereClause = HiveParserSubQueryUtils.subQueryWhere(insertClause);
if (whereClause == null) {
return false;
}
HiveParserASTNode searchCond = (HiveParserASTNode) whereClause.getChild(0);
List<HiveParserASTNode> conjuncts = new ArrayList<>();
HiveParserSubQueryUtils.extractConjuncts(searchCond, conjuncts);
HiveParserQBSubQuery.ConjunctAnalyzer conjunctAnalyzer =
new ConjunctAnalyzer(
parentQueryRR, forHavingClause, outerQueryAlias, frameworkConfig, cluster);
boolean hasCorrelation = false;
boolean hasNonEquiJoinPred = false;
for (HiveParserASTNode conjunctAST : conjuncts) {
HiveParserQBSubQuery.Conjunct conjunct = conjunctAnalyzer.analyzeConjunct(conjunctAST);
if (conjunct.isCorrelated()) {
hasCorrelation = true;
}
if (conjunct.eitherSideRefersBoth() && conjunctAST.getType() != HiveASTParser.EQUAL) {
hasNonEquiJoinPred = true;
}
}
boolean noImplicityGby = true;
if (insertClause.getChild(1).getChildCount() > 3
&& insertClause.getChild(1).getChild(3).getType() == HiveASTParser.TOK_GROUPBY) {
if (insertClause.getChild(1).getChild(3) != null) {
noImplicityGby = false;
}
}
/*
* Restriction.14.h :: Correlated Sub Queries cannot contain Windowing clauses.
*/
if (hasWindowing && hasCorrelation) {
throw new SemanticException(
HiveParserErrorMsg.getMsg(
ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION,
subQueryAST,
"Correlated Sub Queries cannot contain Windowing clauses."));
}
/*
* Restriction.13.m :: In the case of an implied Group By on a
* correlated SubQuery, the SubQuery always returns 1 row.
* An exists on a SubQuery with an implied GBy will always return true.
* Whereas Algebraically transforming to a Join may not return true. See
* Specification doc for details.
* Similarly a not exists on a SubQuery with a implied GBY will always return false.
*/
// Following is special cases for different type of subqueries which have aggregate and no
// implicit group by
// and are correlated
// * EXISTS/NOT EXISTS - NOT allowed, throw an error for now. We plan to allow this later
// * SCALAR - only allow if it has non equi join predicate. This should return true since
// later in subquery remove
// rule we need to know about this case.
// * IN - always allowed, BUT returns true for cases with aggregate other than COUNT since
// later in subquery remove
// rule we need to know about this case.
// * NOT IN - always allow, but always return true because later subq remove rule will
// generate diff plan for this case
if (hasAggreateExprs && noImplicityGby) {
if (operator.getType() == HiveParserQBSubQuery.SubQueryType.EXISTS
|| operator.getType() == HiveParserQBSubQuery.SubQueryType.NOT_EXISTS) {
if (hasCorrelation) {
throw new SemanticException(
HiveParserErrorMsg.getMsg(
ErrorMsg.INVALID_SUBQUERY_EXPRESSION,
subQueryAST,
"A predicate on EXISTS/NOT EXISTS SubQuery with implicit Aggregation(no Group By clause) "
+ "cannot be rewritten."));
}
} else if (operator.getType() == HiveParserQBSubQuery.SubQueryType.SCALAR) {
if (hasNonEquiJoinPred) {
throw new SemanticException(
HiveParserErrorMsg.getMsg(
ErrorMsg.INVALID_SUBQUERY_EXPRESSION,
subQueryAST,
"Scalar subqueries with aggregate cannot have non-equi join predicate"));
}
return hasCorrelation;
} else if (operator.getType() == HiveParserQBSubQuery.SubQueryType.IN) {
return hasCount && hasCorrelation;
} else if (operator.getType() == HiveParserQBSubQuery.SubQueryType.NOT_IN) {
return hasCorrelation;
}
}
return false;
}