in flink-connector-hive/src/main/java/org/apache/flink/table/planner/delegation/hive/copy/HiveASTParseUtils.java [94:250]
private static void processSetColsNode(HiveParserASTNode setCols, ASTSearcher searcher) {
searcher.reset();
CommonTree rootNode = setCols;
while (rootNode != null && rootNode.getType() != HiveASTParser.TOK_INSERT) {
rootNode = rootNode.parent;
}
if (rootNode == null || rootNode.parent == null) {
// Couldn't find the parent insert; replace with ALLCOLREF.
LOG.debug(
"Replacing SETCOLREF with ALLCOLREF because we couldn't find the root INSERT");
setCols.token.setType(HiveASTParser.TOK_ALLCOLREF);
return;
}
rootNode = rootNode.parent; // TOK_QUERY above insert
Tree fromNode = null;
for (int j = 0; j < rootNode.getChildCount(); ++j) {
Tree child = rootNode.getChild(j);
if (child.getType() == HiveASTParser.TOK_FROM) {
fromNode = child;
break;
}
}
if (!(fromNode instanceof HiveParserASTNode)) {
// Couldn't find the from that contains subquery; replace with ALLCOLREF.
LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the FROM");
setCols.token.setType(HiveASTParser.TOK_ALLCOLREF);
return;
}
// We are making what we are trying to do more explicit if there's a union alias; so
// that if we do something we didn't expect to do, it'd be more likely to fail.
String alias = null;
if (fromNode.getChildCount() > 0) {
Tree fromWhat = fromNode.getChild(0);
if (fromWhat.getType() == HiveASTParser.TOK_SUBQUERY && fromWhat.getChildCount() > 1) {
Tree child = fromWhat.getChild(fromWhat.getChildCount() - 1);
if (child.getType() == HiveASTParser.Identifier) {
alias = child.getText();
}
}
}
// Note: we assume that this isn't an already malformed query;
// we don't check for that here - it will fail later anyway.
// First, we find the SELECT closest to the top.
HiveParserASTNode select =
searcher.simpleBreadthFirstSearchAny(
(HiveParserASTNode) fromNode,
HiveASTParser.TOK_SELECT,
HiveASTParser.TOK_SELECTDI);
if (select == null) {
// Couldn't find the from that contains subquery; replace with ALLCOLREF.
LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the SELECT");
setCols.token.setType(HiveASTParser.TOK_ALLCOLREF);
return;
}
// Then, find the leftmost logical sibling select, because that's what Hive uses for
// aliases.
while (true) {
CommonTree queryOfSelect = select.parent;
while (queryOfSelect != null && queryOfSelect.getType() != HiveASTParser.TOK_QUERY) {
queryOfSelect = queryOfSelect.parent;
}
// We should have some QUERY; and also its parent because by supposition we are in subq.
if (queryOfSelect == null || queryOfSelect.parent == null) {
LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the QUERY");
setCols.token.setType(HiveASTParser.TOK_ALLCOLREF);
return;
}
if (queryOfSelect.childIndex == 0) {
break; // We are the left-most child.
}
Tree moreToTheLeft = queryOfSelect.parent.getChild(0);
Preconditions.checkState(moreToTheLeft != queryOfSelect);
HiveParserASTNode newSelect =
searcher.simpleBreadthFirstSearchAny(
(HiveParserASTNode) moreToTheLeft,
HiveASTParser.TOK_SELECT,
HiveASTParser.TOK_SELECTDI);
Preconditions.checkState(newSelect != select);
select = newSelect;
// Repeat the procedure for the new select.
}
// Found the proper columns.
List<HiveParserASTNode> newChildren = new ArrayList<>(select.getChildCount());
HashSet<String> aliases = new HashSet<>();
for (int i = 0; i < select.getChildCount(); ++i) {
Tree selExpr = select.getChild(i);
if (selExpr.getType() == HiveASTParser.QUERY_HINT) {
continue;
}
assert selExpr.getType() == HiveASTParser.TOK_SELEXPR;
assert selExpr.getChildCount() > 0;
// Examine the last child. It could be an alias.
Tree child = selExpr.getChild(selExpr.getChildCount() - 1);
switch (child.getType()) {
case HiveASTParser.TOK_SETCOLREF:
// We have a nested setcolref. Process that and start from scratch TODO: use
// stack?
processSetColsNode((HiveParserASTNode) child, searcher);
processSetColsNode(setCols, searcher);
return;
case HiveASTParser.TOK_ALLCOLREF:
// We should find an alias of this insert and do (alias).*. This however won't
// fix e.g. positional order by alias case, cause we'd still have a star on the
// top level. Bail.
LOG.debug("Replacing SETCOLREF with ALLCOLREF because of nested ALLCOLREF");
setCols.token.setType(HiveASTParser.TOK_ALLCOLREF);
return;
case HiveASTParser.TOK_TABLE_OR_COL:
Tree idChild = child.getChild(0);
assert idChild.getType() == HiveASTParser.Identifier : idChild;
if (!createChildColumnRef(idChild, alias, newChildren, aliases)) {
setCols.token.setType(HiveASTParser.TOK_ALLCOLREF);
return;
}
break;
case HiveASTParser.Identifier:
if (!createChildColumnRef(child, alias, newChildren, aliases)) {
setCols.token.setType(HiveASTParser.TOK_ALLCOLREF);
return;
}
break;
case HiveASTParser.DOT:
{
Tree colChild = child.getChild(child.getChildCount() - 1);
assert colChild.getType() == HiveASTParser.Identifier : colChild;
if (!createChildColumnRef(colChild, alias, newChildren, aliases)) {
setCols.token.setType(HiveASTParser.TOK_ALLCOLREF);
return;
}
break;
}
default:
// Not really sure how to refer to this (or if we can).
// TODO: We could find a different from branch for the union, that might have an
// alias? Or we could add an alias here to refer to, but that might break other
// branches.
LOG.debug(
"Replacing SETCOLREF with ALLCOLREF because of the nested node "
+ child.getType()
+ " "
+ child.getText());
setCols.token.setType(HiveASTParser.TOK_ALLCOLREF);
return;
}
}
// Insert search in the beginning would have failed if these parents didn't exist.
HiveParserASTNode parent = (HiveParserASTNode) setCols.parent.parent;
int t = parent.getType();
assert t == HiveASTParser.TOK_SELECT || t == HiveASTParser.TOK_SELECTDI : t;
int ix = setCols.parent.childIndex;
parent.deleteChild(ix);
for (HiveParserASTNode node : newChildren) {
parent.insertChild(ix++, node);
}
}