in ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java [869:1121]
protected T getXpathOrFuncExprNodeDesc(ASTNode node,
boolean isFunction, List<T> children, TypeCheckCtx ctx)
throws SemanticException {
// return the child directly if the conversion is redundant.
if (isRedundantConversionFunction(node, isFunction, children)) {
assert (children.size() == 1);
assert (children.get(0) != null);
return children.get(0);
}
String funcText = getFunctionText(node, isFunction);
T expr;
if (funcText.equals(".")) {
assert (children.size() == 2);
// Only allow constant field name for now
assert (exprFactory.isConstantExpr(children.get(1)));
T object = children.get(0);
// Calculate result TypeInfo
String fieldNameString = exprFactory.getConstantValueAsString(children.get(1));
TypeInfo objectTypeInfo = exprFactory.getTypeInfo(object);
// Allow accessing a field of list element structs directly from a list
boolean isList = (objectTypeInfo.getCategory() == ObjectInspector.Category.LIST);
if (isList) {
objectTypeInfo = ((ListTypeInfo) objectTypeInfo).getListElementTypeInfo();
}
if (objectTypeInfo.getCategory() != Category.STRUCT) {
throw new SemanticException(ASTErrorUtils.getMsg(
ErrorMsg.INVALID_DOT.getMsg(), node));
}
TypeInfo t = ((StructTypeInfo) objectTypeInfo).getStructFieldTypeInfo(fieldNameString);
if (isList) {
t = TypeInfoFactory.getListTypeInfo(t);
}
expr = exprFactory.createNestedColumnRefExpr(t, children.get(0), fieldNameString, isList);
} else if (funcText.equals("[")) {
funcText = "index";
FunctionInfo fi = exprFactory.getFunctionInfo(funcText);
// "[]" : LSQUARE/INDEX Expression
if (!ctx.getallowIndexExpr()) {
throw new SemanticException(ASTErrorUtils.getMsg(
ErrorMsg.INVALID_FUNCTION.getMsg(), node));
}
assert (children.size() == 2);
// Check whether this is a list or a map
TypeInfo myt = exprFactory.getTypeInfo(children.get(0));
if (myt.getCategory() == Category.LIST) {
// Only allow integer index for now
if (!TypeInfoUtils.implicitConvertible(exprFactory.getTypeInfo(children.get(1)),
TypeInfoFactory.intTypeInfo)) {
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(
node, ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg()));
}
// Calculate TypeInfo
TypeInfo t = node.getTypeInfo() != null ? node.getTypeInfo() : ((ListTypeInfo) myt).getListElementTypeInfo();
expr = exprFactory.createFuncCallExpr(t, fi, funcText, children);
} else if (myt.getCategory() == Category.MAP) {
if (!TypeInfoUtils.implicitConvertible(exprFactory.getTypeInfo(children.get(1)),
((MapTypeInfo) myt).getMapKeyTypeInfo())) {
throw new SemanticException(ASTErrorUtils.getMsg(
ErrorMsg.INVALID_MAPINDEX_TYPE.getMsg(), node));
}
// Calculate TypeInfo
TypeInfo t = node.getTypeInfo() != null ? node.getTypeInfo() : ((MapTypeInfo) myt).getMapValueTypeInfo();
expr = exprFactory.createFuncCallExpr(t, fi, funcText, children);
} else {
throw new SemanticException(ASTErrorUtils.getMsg(
ErrorMsg.NON_COLLECTION_TYPE.getMsg(), node, myt.getTypeName()));
}
} else {
// other operators or functions
FunctionInfo fi = exprFactory.getFunctionInfo(funcText);
if (fi == null) {
if (isFunction) {
throw new SemanticException(ASTErrorUtils.getMsg(
ErrorMsg.INVALID_FUNCTION.getMsg(), (ASTNode) node.getChild(0)));
} else {
throw new SemanticException(ASTErrorUtils.getMsg(
ErrorMsg.INVALID_FUNCTION.getMsg(), node));
}
}
if (!fi.isNative()) {
ctx.getUnparseTranslator().addIdentifierTranslation(
(ASTNode) node.getChild(0));
}
// Handle type casts that may contain type parameters
TypeInfo typeInfo = isFunction ? getTypeInfo((ASTNode) node.getChild(0)) : null;
insertCast(funcText, children);
validateUDF(node, isFunction, ctx, fi, children);
// Try to infer the type of the constant only if there are two
// nodes, one of them is column and the other is numeric const
if (exprFactory.isCompareFunction(fi)
&& children.size() == 2
&& ((exprFactory.isConstantExpr(children.get(0))
&& exprFactory.isColumnRefExpr(children.get(1)))
|| (exprFactory.isColumnRefExpr(children.get(0))
&& exprFactory.isConstantExpr(children.get(1))))) {
int constIdx = exprFactory.isConstantExpr(children.get(0)) ? 0 : 1;
T constChild = children.get(constIdx);
T columnChild = children.get(1 - constIdx);
final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(
exprFactory.getTypeInfo(columnChild).getTypeName().toLowerCase());
T newChild = interpretNodeAsConstant(colTypeInfo, constChild);
if (newChild != null) {
children.set(constIdx, newChild);
}
}
// The "in" function is sometimes changed to an "or". Later on, the "or"
// function is processed a little differently. We don't want to process this
// new "or" function differently, so we track it with this variable.
// TODO: Test to see if this can be removed.
boolean functionInfoChangedFromIn = false;
if (exprFactory.isInFunction(fi)) {
// We will split the IN clause into different IN clauses, one for each
// different value type. The reason is that Hive and Calcite treat
// types in IN clauses differently and it is practically impossible
// to find some correct implementation unless this is done.
ListMultimap<TypeInfo, T> expressions = ArrayListMultimap.create();
for (int i = 1; i < children.size(); i++) {
T columnDesc = children.get(0);
T valueDesc = interpretNode(columnDesc, children.get(i));
if (valueDesc != null) {
// Only add to the expression map if types can be coerced
TypeInfo targetType = exprFactory.getTypeInfo(valueDesc);
if (!expressions.containsKey(targetType)) {
expressions.put(targetType, columnDesc);
}
expressions.put(targetType, valueDesc);
}
}
if(expressions.isEmpty()) {
// We will only hit this when none of the operands inside the "in" clause can be type-coerced
// That would imply that the result of "in" is a boolean "false"
// This should not impact those cases where the "in" clause is used on a boolean column and
// there is no operand in the "in" clause that cannot be type-coerced into boolean because
// in case of boolean, Hive does not allow such use cases and throws an error
return exprFactory.createBooleanConstantExpr("false");
}
children.clear();
List<T> newExprs = new ArrayList<>();
int numEntries = expressions.keySet().size();
if (numEntries == 1) {
children.addAll(expressions.asMap().values().iterator().next());
funcText = "in";
fi = exprFactory.getFunctionInfo("in");
} else {
FunctionInfo inFunctionInfo = exprFactory.getFunctionInfo("in");
for (Collection<T> c : expressions.asMap().values()) {
newExprs.add(exprFactory.createFuncCallExpr(node.getTypeInfo(), inFunctionInfo,
"in", (List<T>) c));
}
children.addAll(newExprs);
funcText = "or";
fi = exprFactory.getFunctionInfo("or");
functionInfoChangedFromIn = true;
}
}
if (exprFactory.isOrFunction(fi) && !functionInfoChangedFromIn) {
// flatten OR
List<T> childrenList = new ArrayList<>(children.size());
for (T child : children) {
if (TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.VOID_TYPE_NAME).equals(exprFactory.getTypeInfo(child))) {
child = exprFactory.setTypeInfo(child, TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME));
}
if (exprFactory.isORFuncCallExpr(child)) {
childrenList.addAll(exprFactory.getExprChildren(child));
} else {
childrenList.add(child);
}
}
expr = exprFactory.createFuncCallExpr(node.getTypeInfo(), fi, funcText, childrenList);
} else if (exprFactory.isAndFunction(fi)) {
// flatten AND
List<T> childrenList = new ArrayList<>(children.size());
for (T child : children) {
if (TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.VOID_TYPE_NAME).equals(exprFactory.getTypeInfo(child))) {
child = exprFactory.setTypeInfo(child, TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME));
}
if (exprFactory.isANDFuncCallExpr(child)) {
childrenList.addAll(exprFactory.getExprChildren(child));
} else {
childrenList.add(child);
}
}
expr = exprFactory.createFuncCallExpr(node.getTypeInfo(), fi, funcText, childrenList);
} else if (ctx.isFoldExpr() && exprFactory.convertCASEIntoCOALESCEFuncCallExpr(fi, children)) {
// Rewrite CASE into COALESCE
fi = exprFactory.getFunctionInfo("coalesce");
expr = exprFactory.createFuncCallExpr(node.getTypeInfo(), fi, "coalesce",
Lists.newArrayList(children.get(0), exprFactory.createBooleanConstantExpr(Boolean.FALSE.toString())));
if (Boolean.FALSE.equals(exprFactory.getConstantValue(children.get(1)))) {
fi = exprFactory.getFunctionInfo("not");
expr = exprFactory.createFuncCallExpr(node.getTypeInfo(), fi, "not", Lists.newArrayList(expr));
}
} else if (ctx.isFoldExpr() && exprFactory.convertCASEIntoIFFuncCallExpr(fi, children)) {
// Rewrite CASE(C,A,B) into IF(C,A,B)
fi = exprFactory.getFunctionInfo("if");
expr = exprFactory.createFuncCallExpr(node.getTypeInfo(), fi, "if", children);
} else {
TypeInfo t = (node.getTypeInfo() != null) ? node.getTypeInfo() : typeInfo;
expr = exprFactory.createFuncCallExpr(t, fi, funcText, children);
}
if (exprFactory.isSTRUCTFuncCallExpr(expr)) {
expr = exprFactory.replaceFieldNamesInStruct(expr, ctx.getColumnAliases());
}
// If the function is deterministic and the children are constants,
// we try to fold the expression to remove e.g. cast on constant
if (ctx.isFoldExpr() && exprFactory.isFuncCallExpr(expr) &&
exprFactory.isConsistentWithinQuery(fi) &&
exprFactory.isAllConstants(children)) {
T constantExpr = exprFactory.foldExpr(expr);
if (constantExpr != null) {
expr = constantExpr;
}
}
}
if (exprFactory.isPOSITIVEFuncCallExpr(expr)) {
// UDFOPPositive is a no-op.
assert (exprFactory.getExprChildren(expr).size() == 1);
expr = exprFactory.getExprChildren(expr).get(0);
} else if (exprFactory.isNEGATIVEFuncCallExpr(expr)) {
// UDFOPNegative should always be folded.
assert (exprFactory.getExprChildren(expr).size() == 1);
T input = exprFactory.getExprChildren(expr).get(0);
if (exprFactory.isConstantExpr(input)) {
T constantExpr = exprFactory.foldExpr(expr);
if (constantExpr != null) {
expr = constantExpr;
}
}
}
assert (expr != null);
return expr;
}