protected T getXpathOrFuncExprNodeDesc()

in ql/src/java/org/apache/hadoop/hive/ql/parse/type/TypeCheckProcFactory.java [869:1121]


    protected T getXpathOrFuncExprNodeDesc(ASTNode node,
        boolean isFunction, List<T> children, TypeCheckCtx ctx)
        throws SemanticException {
      // return the child directly if the conversion is redundant.
      if (isRedundantConversionFunction(node, isFunction, children)) {
        assert (children.size() == 1);
        assert (children.get(0) != null);
        return children.get(0);
      }
      String funcText = getFunctionText(node, isFunction);
      T expr;
      if (funcText.equals(".")) {
        assert (children.size() == 2);
        // Only allow constant field name for now
        assert (exprFactory.isConstantExpr(children.get(1)));
        T object = children.get(0);

        // Calculate result TypeInfo
        String fieldNameString = exprFactory.getConstantValueAsString(children.get(1));
        TypeInfo objectTypeInfo = exprFactory.getTypeInfo(object);

        // Allow accessing a field of list element structs directly from a list
        boolean isList = (objectTypeInfo.getCategory() == ObjectInspector.Category.LIST);
        if (isList) {
          objectTypeInfo = ((ListTypeInfo) objectTypeInfo).getListElementTypeInfo();
        }
        if (objectTypeInfo.getCategory() != Category.STRUCT) {
          throw new SemanticException(ASTErrorUtils.getMsg(
              ErrorMsg.INVALID_DOT.getMsg(), node));
        }
        TypeInfo t = ((StructTypeInfo) objectTypeInfo).getStructFieldTypeInfo(fieldNameString);
        if (isList) {
          t = TypeInfoFactory.getListTypeInfo(t);
        }

        expr = exprFactory.createNestedColumnRefExpr(t, children.get(0), fieldNameString, isList);
      } else if (funcText.equals("[")) {
        funcText = "index";
        FunctionInfo fi = exprFactory.getFunctionInfo(funcText);

        // "[]" : LSQUARE/INDEX Expression
        if (!ctx.getallowIndexExpr()) {
          throw new SemanticException(ASTErrorUtils.getMsg(
              ErrorMsg.INVALID_FUNCTION.getMsg(), node));
        }

        assert (children.size() == 2);

        // Check whether this is a list or a map
        TypeInfo myt = exprFactory.getTypeInfo(children.get(0));

        if (myt.getCategory() == Category.LIST) {
          // Only allow integer index for now
          if (!TypeInfoUtils.implicitConvertible(exprFactory.getTypeInfo(children.get(1)),
              TypeInfoFactory.intTypeInfo)) {
            throw new SemanticException(SemanticAnalyzer.generateErrorMessage(
                node, ErrorMsg.INVALID_ARRAYINDEX_TYPE.getMsg()));
          }

          // Calculate TypeInfo
          TypeInfo t = node.getTypeInfo() != null ? node.getTypeInfo() : ((ListTypeInfo) myt).getListElementTypeInfo();
          expr = exprFactory.createFuncCallExpr(t, fi, funcText, children);
        } else if (myt.getCategory() == Category.MAP) {
          if (!TypeInfoUtils.implicitConvertible(exprFactory.getTypeInfo(children.get(1)),
              ((MapTypeInfo) myt).getMapKeyTypeInfo())) {
            throw new SemanticException(ASTErrorUtils.getMsg(
                ErrorMsg.INVALID_MAPINDEX_TYPE.getMsg(), node));
          }
          // Calculate TypeInfo
          TypeInfo t = node.getTypeInfo() != null ? node.getTypeInfo() : ((MapTypeInfo) myt).getMapValueTypeInfo();
          expr = exprFactory.createFuncCallExpr(t, fi, funcText, children);
        } else {
          throw new SemanticException(ASTErrorUtils.getMsg(
              ErrorMsg.NON_COLLECTION_TYPE.getMsg(), node, myt.getTypeName()));
        }
      } else {
        // other operators or functions
        FunctionInfo fi = exprFactory.getFunctionInfo(funcText);

        if (fi == null) {
          if (isFunction) {
            throw new SemanticException(ASTErrorUtils.getMsg(
                ErrorMsg.INVALID_FUNCTION.getMsg(), (ASTNode) node.getChild(0)));
          } else {
            throw new SemanticException(ASTErrorUtils.getMsg(
                ErrorMsg.INVALID_FUNCTION.getMsg(), node));
          }
        }

        if (!fi.isNative()) {
          ctx.getUnparseTranslator().addIdentifierTranslation(
              (ASTNode) node.getChild(0));
        }

        // Handle type casts that may contain type parameters
        TypeInfo typeInfo = isFunction ? getTypeInfo((ASTNode) node.getChild(0)) : null;

        insertCast(funcText, children);

        validateUDF(node, isFunction, ctx, fi, children);

        // Try to infer the type of the constant only if there are two
        // nodes, one of them is column and the other is numeric const
        if (exprFactory.isCompareFunction(fi)
            && children.size() == 2
            && ((exprFactory.isConstantExpr(children.get(0))
            && exprFactory.isColumnRefExpr(children.get(1)))
            || (exprFactory.isColumnRefExpr(children.get(0))
            && exprFactory.isConstantExpr(children.get(1))))) {

          int constIdx = exprFactory.isConstantExpr(children.get(0)) ? 0 : 1;

          T constChild = children.get(constIdx);
          T columnChild = children.get(1 - constIdx);

          final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(
              exprFactory.getTypeInfo(columnChild).getTypeName().toLowerCase());
          T newChild = interpretNodeAsConstant(colTypeInfo, constChild);
          if (newChild != null) {
            children.set(constIdx, newChild);
          }
        }
        // The "in" function is sometimes changed to an "or".  Later on, the "or"
        // function is processed a little differently.  We don't want to process this
        // new "or" function differently, so we track it with this variable.
        // TODO: Test to see if this can be removed.
        boolean functionInfoChangedFromIn = false;
        if (exprFactory.isInFunction(fi)) {
          // We will split the IN clause into different IN clauses, one for each
          // different value type. The reason is that Hive and Calcite treat
          // types in IN clauses differently and it is practically impossible
          // to find some correct implementation unless this is done.
          ListMultimap<TypeInfo, T> expressions = ArrayListMultimap.create();
          for (int i = 1; i < children.size(); i++) {
            T columnDesc = children.get(0);
            T valueDesc = interpretNode(columnDesc, children.get(i));
            if (valueDesc != null) {
              // Only add to the expression map if types can be coerced
              TypeInfo targetType = exprFactory.getTypeInfo(valueDesc);
              if (!expressions.containsKey(targetType)) {
                expressions.put(targetType, columnDesc);
              }
              expressions.put(targetType, valueDesc);
            }
          }
          if(expressions.isEmpty()) {
            // We will only hit this when none of the operands inside the "in" clause can be type-coerced
            // That would imply that the result of "in" is a boolean "false"
            // This should not impact those cases where the "in" clause is used on a boolean column and
            // there is no operand in the "in" clause that cannot be type-coerced into boolean because
            // in case of boolean, Hive does not allow such use cases and throws an error
            return exprFactory.createBooleanConstantExpr("false");
          }

          children.clear();
          List<T> newExprs = new ArrayList<>();
          int numEntries = expressions.keySet().size();
          if (numEntries == 1) {
            children.addAll(expressions.asMap().values().iterator().next());
            funcText = "in";
            fi = exprFactory.getFunctionInfo("in");
          } else {
            FunctionInfo inFunctionInfo  = exprFactory.getFunctionInfo("in");
            for (Collection<T> c : expressions.asMap().values()) {
              newExprs.add(exprFactory.createFuncCallExpr(node.getTypeInfo(), inFunctionInfo,
                  "in", (List<T>) c));
            }
            children.addAll(newExprs);
            funcText = "or";
            fi = exprFactory.getFunctionInfo("or");
            functionInfoChangedFromIn = true;
          }
        }
        if (exprFactory.isOrFunction(fi) && !functionInfoChangedFromIn) {
          // flatten OR
          List<T> childrenList = new ArrayList<>(children.size());
          for (T child : children) {
            if (TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.VOID_TYPE_NAME).equals(exprFactory.getTypeInfo(child))) {
              child = exprFactory.setTypeInfo(child, TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME));
            }
            if (exprFactory.isORFuncCallExpr(child)) {
              childrenList.addAll(exprFactory.getExprChildren(child));
            } else {
              childrenList.add(child);
            }
          }
          expr = exprFactory.createFuncCallExpr(node.getTypeInfo(), fi, funcText, childrenList);
        } else if (exprFactory.isAndFunction(fi)) {
          // flatten AND
          List<T> childrenList = new ArrayList<>(children.size());
          for (T child : children) {
            if (TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.VOID_TYPE_NAME).equals(exprFactory.getTypeInfo(child))) {
              child = exprFactory.setTypeInfo(child, TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME));
            }
            if (exprFactory.isANDFuncCallExpr(child)) {
              childrenList.addAll(exprFactory.getExprChildren(child));
            } else {
              childrenList.add(child);
            }
          }
          expr = exprFactory.createFuncCallExpr(node.getTypeInfo(), fi, funcText, childrenList);
        } else if (ctx.isFoldExpr() && exprFactory.convertCASEIntoCOALESCEFuncCallExpr(fi, children)) {
          // Rewrite CASE into COALESCE
          fi = exprFactory.getFunctionInfo("coalesce");
          expr = exprFactory.createFuncCallExpr(node.getTypeInfo(), fi, "coalesce",
              Lists.newArrayList(children.get(0), exprFactory.createBooleanConstantExpr(Boolean.FALSE.toString())));
          if (Boolean.FALSE.equals(exprFactory.getConstantValue(children.get(1)))) {
            fi = exprFactory.getFunctionInfo("not");
            expr = exprFactory.createFuncCallExpr(node.getTypeInfo(), fi, "not", Lists.newArrayList(expr));
          }
        } else if (ctx.isFoldExpr() && exprFactory.convertCASEIntoIFFuncCallExpr(fi, children)) {
          // Rewrite CASE(C,A,B) into IF(C,A,B)
          fi = exprFactory.getFunctionInfo("if");
          expr = exprFactory.createFuncCallExpr(node.getTypeInfo(), fi, "if", children);
        } else {
          TypeInfo t = (node.getTypeInfo() != null) ? node.getTypeInfo() : typeInfo;
          expr = exprFactory.createFuncCallExpr(t, fi, funcText, children);
        }

        if (exprFactory.isSTRUCTFuncCallExpr(expr)) {
          expr = exprFactory.replaceFieldNamesInStruct(expr, ctx.getColumnAliases());
        }

        // If the function is deterministic and the children are constants,
        // we try to fold the expression to remove e.g. cast on constant
        if (ctx.isFoldExpr() && exprFactory.isFuncCallExpr(expr) &&
            exprFactory.isConsistentWithinQuery(fi) &&
            exprFactory.isAllConstants(children)) {
          T constantExpr = exprFactory.foldExpr(expr);
          if (constantExpr != null) {
            expr = constantExpr;
          }
        }
      }

      if (exprFactory.isPOSITIVEFuncCallExpr(expr)) {
        // UDFOPPositive is a no-op.
        assert (exprFactory.getExprChildren(expr).size() == 1);
        expr = exprFactory.getExprChildren(expr).get(0);
      } else if (exprFactory.isNEGATIVEFuncCallExpr(expr)) {
        // UDFOPNegative should always be folded.
        assert (exprFactory.getExprChildren(expr).size() == 1);
        T input = exprFactory.getExprChildren(expr).get(0);
        if (exprFactory.isConstantExpr(input)) {
          T constantExpr = exprFactory.foldExpr(expr);
          if (constantExpr != null) {
            expr = constantExpr;
          }
        }
      }
      assert (expr != null);
      return expr;
    }