protected void analyzeImpl()

in fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java [626:835]


  protected void analyzeImpl(Analyzer analyzer) throws AnalysisException {
    fnName_.analyze(analyzer);
    if (!fnName_.isBuiltin()) {
      FrontendProfile profile = FrontendProfile.getCurrentOrNull();
      if (profile != null) {
        String udfInfoStringKey = "User Defined Functions (UDFs)";
        String functionName = fnName_.toString();
        if (!profile.getInfoString(udfInfoStringKey).contains(functionName)) {
          profile.appendInfoString(udfInfoStringKey, functionName);
        }
      }
      analyzer.registerPrivReq(builder ->
          builder.allOf(Privilege.SELECT)
          .onFunction(fnName_.getDb(), fnName_.getFunction()).build());
    }

    if (isMergeAggFn()) {
      // This is the function call expr after splitting up to a merge aggregation.
      // The function has already been analyzed so just do the minimal sanity
      // check here.
      AggregateFunction aggFn = (AggregateFunction)fn_;
      Preconditions.checkNotNull(aggFn);
      Type intermediateType = aggFn.getIntermediateType();
      if (intermediateType == null) intermediateType = type_;
      Preconditions.checkState(!type_.isWildcardDecimal());
      return;
    }

    // User needs DB access.
    FeDb db = analyzer.getDb(fnName_.getDb(), Privilege.VIEW_METADATA, true);
    if (!db.containsFunction(fnName_.getFunction())) {
      throw new AnalysisException(fnName_ + "() unknown for database " + db.getName()
          + ". Currently this db has " + db.numFunctions() + " functions.");
    }

    if (isBuiltinCastFunction()) {
      throw new AnalysisException(toSql() +
          " is reserved for internal use only. Use 'cast(expr AS type)' instead.");
    }

    if (fnName_.getFunction().equals("count") && params_.isDistinct()) {
      // Treat COUNT(DISTINCT ...) special because of how we do the rewrite.
      // There is no version of COUNT() that takes more than 1 argument but after
      // the rewrite, we only need count(*).
      // TODO: fix how we rewrite count distinct.
      Function searchDesc = new Function(fnName_, new Type[0], Type.INVALID, false);
      fn_ = db.getFunction(searchDesc, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
      type_ = fn_.getReturnType();
      // Make sure BE doesn't see any TYPE_NULL exprs
      for (int i = 0; i < children_.size(); ++i) {
        if (getChild(i).getType().isNull()) {
          uncheckedCastChild(ScalarType.BOOLEAN, i);
        }
      }
      return;
    }

    // grouping_id() can take any set of input slot arguments. Just resolve it to the
    // zero-argument version so it can be rewritten in MultiAggregateInfo.
    if (isGroupingIdBuiltin()) {
      Function searchDesc = new Function(fnName_, new Type[0], Type.INVALID, false);
      fn_ = db.getFunction(searchDesc, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
      type_ = fn_.getReturnType();
      return;
    }

    // TODO: We allow implicit cast from string->timestamp but only
    // support avg(timestamp). This means avg(string_col) would work
    // from our casting rules. This is not right.
    // We need to revisit where implicit casts are allowed for string
    // to timestamp
    if (fnName_.getFunction().equalsIgnoreCase("avg") &&
      children_.size() == 1 && children_.get(0).getType().isStringType()) {
      throw new AnalysisException(
          "AVG requires a numeric or timestamp parameter: " + toSql());
    }

    // SAMPLED_NDV() is only valid with two children. Invocations with an invalid number
    // of children are gracefully handled when resolving the function signature.
    if (fnName_.getFunction().equalsIgnoreCase("sampled_ndv")
        && children_.size() == 2) {
      if (!(children_.get(1) instanceof NumericLiteral)) {
        throw new AnalysisException(
            "Second parameter of SAMPLED_NDV() must be a numeric literal in [0,1]: " +
            children_.get(1).toSql());
      }
      NumericLiteral samplePerc = (NumericLiteral) children_.get(1);
      if (samplePerc.getDoubleValue() < 0 || samplePerc.getDoubleValue() > 1.0) {
        throw new AnalysisException(
            "Second parameter of SAMPLED_NDV() must be a numeric literal in [0,1]: " +
            samplePerc.toSql());
      }
      // Numeric literals with a decimal point are analyzed as decimals. Without this
      // cast we might resolve to the wrong function because there is no exactly
      // matching signature with decimal as the second argument.
      children_.set(1, samplePerc.uncheckedCastTo(Type.DOUBLE));
    }

    Type[] argTypes = collectChildReturnTypes();
    Function searchDesc = new Function(fnName_, argTypes, Type.INVALID, false);
    fn_ = db.getFunction(searchDesc, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
    if (fn_ == null || (!isInternalFnCall_ && !fn_.userVisible())) {
      throw new AnalysisException(getFunctionNotFoundError(argTypes));
    }

    // NDV() can optionally take a second argument which must be an integer literal
    // in the range from 1 to 10. Perform the analysis here.
    if (fnName_.getFunction().equalsIgnoreCase("ndv") && children_.size() == 2) {
      if (!(children_.get(1) instanceof NumericLiteral)) {
        throw new AnalysisException(
            "Second parameter of NDV() must be an integer literal: "
            + children_.get(1).toSql());
      }

      NumericLiteral scale = (NumericLiteral) children_.get(1);

      if (scale.getValue().scale() != 0
          || !NumericLiteral.fitsInInt(scale.getValue())
          || scale.getIntValue() < 1 || scale.getIntValue() > 10) {
        throw new AnalysisException(
            "Second parameter of NDV() must be an integer literal in [1,10]: "
            + scale.toSql());
      }
      children_.set(1, scale.uncheckedCastTo(Type.INT));

      // In BuiltinsDb, look for an AggregateFunction template with the correct length for
      // the intermediate data type and use it.
      BuiltinsDb builtinDb = (BuiltinsDb) db;
      int size = ComputeHllLengthFromScale(scale.getIntValue());
      fn_ = builtinDb.resolveNdvIntermediateType((AggregateFunction) fn_, size);

      if (fn_ == null) {
        throw new AnalysisException(
            "A suitable intermediate data type cannot be found for the second parameter "
            + children_.get(1).toSql() + " in NDV()");
      }
    }

    if (isAggregateFunction()) {
      // subexprs must not contain aggregates
      if (TreeNode.contains(children_, Expr.IS_AGGREGATE)) {
        throw new AnalysisException(
            "aggregate function must not contain aggregate parameters: " + this.toSql());
      }

      // .. or analytic exprs
      if (Expr.contains(children_, AnalyticExpr.class)) {
        throw new AnalysisException(
            "aggregate function must not contain analytic parameters: " + this.toSql());
      }

      // The catalog contains count() with no arguments to handle count(*) but don't
      // accept count().
      // TODO: can this be handled more cleanly. It does seem like a special case since
      // no other aggregate functions (currently) can accept '*'.
      if (fnName_.getFunction().equalsIgnoreCase("count") &&
          !params_.isStar() && children_.size() == 0) {
        throw new AnalysisException("count() is not allowed.");
      }

      // TODO: the distinct rewrite does not handle this but why?
      if (params_.isDistinct()) {
        // The second argument in group_concat(distinct) must be a constant expr that
        // returns a string.
        if (fnName_.getFunction().equalsIgnoreCase("group_concat")
            && getChildren().size() == 2
            && !getChild(1).isConstant()) {
            throw new AnalysisException("Second parameter in GROUP_CONCAT(DISTINCT)" +
                " must be a constant expression that returns a string.");
        }
        if (fn_.getBinaryType() != TFunctionBinaryType.BUILTIN) {
          throw new AnalysisException("User defined aggregates do not support DISTINCT.");
        }
      }

      AggregateFunction aggFn = (AggregateFunction)fn_;
      if (aggFn.ignoresDistinct()) params_.setIsDistinct(false);

      if (aggFn.isUnsupported()) {
        throw new AnalysisException(getFunctionNotFoundError(argTypes));
      }
    }

    if (params_.isIgnoreNulls() && !isAnalyticFnCall_) {
      throw new AnalysisException("Function " + fnName_.getFunction().toUpperCase()
          + " does not accept the keyword IGNORE NULLS.");
    }

    if (isScalarFunction()) validateScalarFnParams(params_);
    if (fn_ instanceof AggregateFunction
        && ((AggregateFunction) fn_).isAnalyticFn()
        && !((AggregateFunction) fn_).isAggregateFn()
        && !isAnalyticFnCall_) {
      throw new AnalysisException(
          "Analytic function requires an OVER clause: " + toSql());
    }

    castForFunctionCall(false, analyzer.getRegularCompatibilityLevel());
    type_ = fn_.getReturnType();
    if (type_.isDecimal() && type_.isWildcardDecimal()) {
      type_ = resolveDecimalReturnType(analyzer);
    }

    // We do not allow any function to return a type CHAR or VARCHAR
    // TODO add support for CHAR(N) and VARCHAR(N) return values in post 2.0,
    // support for this was not added to the backend in 2.0
    if (type_.isWildcardChar() || type_.isWildcardVarchar()) {
      type_ = ScalarType.STRING;
    }
  }