private long evaluateComparator()

in ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java [1019:1307]


    private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, ExprNodeGenericFuncDesc genFunc,
        long currNumRows) {
      GenericUDF udf = genFunc.getGenericUDF();

      ExprNodeColumnDesc columnDesc;
      ExprNodeConstantDesc constantDesc;
      boolean upperBound;
      boolean closedBound;
      String boundValue;
      if (genFunc.getChildren().get(0) instanceof ExprNodeColumnDesc &&
          genFunc.getChildren().get(1) instanceof ExprNodeConstantDesc) {
        columnDesc = (ExprNodeColumnDesc) genFunc.getChildren().get(0);
        constantDesc = (ExprNodeConstantDesc) genFunc.getChildren().get(1);
        aspCtx.addAffectedColumn(columnDesc);
        // Comparison to null will always return false
        if (constantDesc.getValue() == null) {
          return 0;
        }
        boundValue = constantDesc.getValue().toString();
        upperBound = udf instanceof GenericUDFOPEqualOrLessThan ||
            udf instanceof GenericUDFOPLessThan;
        closedBound =  isClosedBound(udf);
      } else if (genFunc.getChildren().get(1) instanceof ExprNodeColumnDesc &&
          genFunc.getChildren().get(0) instanceof ExprNodeConstantDesc) {
        columnDesc = (ExprNodeColumnDesc) genFunc.getChildren().get(1);
        constantDesc = (ExprNodeConstantDesc) genFunc.getChildren().get(0);
        aspCtx.addAffectedColumn(columnDesc);
        // Comparison to null will always return false
        if (constantDesc.getValue() == null) {
          return 0;
        }
        boundValue = constantDesc.getValue().toString();
        upperBound = udf instanceof GenericUDFOPEqualOrGreaterThan ||
            udf instanceof GenericUDFOPGreaterThan;
        closedBound = isClosedBound(udf);
      } else {
        // default
        return currNumRows / 3;
      }

      ColStatistics cs = stats.getColumnStatisticsFromColName(columnDesc.getColumn());
      String colTypeLowerCase = columnDesc.getTypeString().toLowerCase();

      if (FilterSelectivityEstimator.isHistogramAvailable(cs)) {
        try {
          return evaluateComparatorWithHistogram(
              cs, currNumRows, colTypeLowerCase, boundValue, upperBound, closedBound);
        } catch (SketchesArgumentException e) {
          LOG.info("Sketch-based statistics estimation failed, falling back to regular estimation", e);
        }
      }

      if (cs != null && cs.getRange() != null &&
          cs.getRange().maxValue != null && cs.getRange().minValue != null) {
        try {
          if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)) {
            byte value = Byte.parseByte(boundValue);
            byte maxValue = cs.getRange().maxValue.byteValue();
            byte minValue = cs.getRange().minValue.byteValue();
            if (upperBound) {
              if (maxValue < value || maxValue == value && closedBound) {
                return currNumRows;
              }
              if (minValue > value || minValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((double) (value - minValue) / (maxValue - minValue)) * currNumRows);
              }
            } else {
              if (minValue > value || minValue == value && closedBound) {
                return currNumRows;
              }
              if (maxValue < value || maxValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * currNumRows);
              }
            }
          } else if (colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
            short value = Short.parseShort(boundValue);
            short maxValue = cs.getRange().maxValue.shortValue();
            short minValue = cs.getRange().minValue.shortValue();
            if (upperBound) {
              if (maxValue < value || maxValue == value && closedBound) {
                return currNumRows;
              }
              if (minValue > value || minValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((double) (value - minValue) / (maxValue - minValue)) * currNumRows);
              }
            } else {
              if (minValue > value || minValue == value && closedBound) {
                return currNumRows;
              }
              if (maxValue < value || maxValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * currNumRows);
              }
            }
          } else if (colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME) ||
              colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
            int value;
            if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
              DateWritable writableVal = new DateWritable(java.sql.Date.valueOf(boundValue));
              value = writableVal.getDays();
            } else {
              value = Integer.parseInt(boundValue);
            }
            // Date is an integer internally
            int maxValue = cs.getRange().maxValue.intValue();
            int minValue = cs.getRange().minValue.intValue();
            if (upperBound) {
              if (maxValue < value || maxValue == value && closedBound) {
                return currNumRows;
              }
              if (minValue > value || minValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((double) (value - minValue) / (maxValue - minValue)) * currNumRows);
              }
            } else {
              if (minValue > value || minValue == value && closedBound) {
                return currNumRows;
              }
              if (maxValue < value || maxValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * currNumRows);
              }
            }
          } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME) ||
              colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
            long value;
            if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
              TimestampWritableV2 timestampWritable = new TimestampWritableV2(Timestamp.valueOf(boundValue));
              value = timestampWritable.getTimestamp().toEpochSecond();
            } else {
              value = Long.parseLong(boundValue);
            }
            long maxValue = cs.getRange().maxValue.longValue();
            long minValue = cs.getRange().minValue.longValue();
            if (upperBound) {
              if (maxValue < value || maxValue == value && closedBound) {
                return currNumRows;
              }
              if (minValue > value || minValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((double) (value - minValue) / (maxValue - minValue)) * currNumRows);
              }
            } else {
              if (minValue > value || minValue == value && closedBound) {
                return currNumRows;
              }
              if (maxValue < value || maxValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * currNumRows);
              }
            }
          } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
            float value = Float.parseFloat(boundValue);
            float maxValue = cs.getRange().maxValue.floatValue();
            float minValue = cs.getRange().minValue.floatValue();
            if (upperBound) {
              if (maxValue < value || maxValue == value && closedBound) {
                return currNumRows;
              }
              if (minValue > value || minValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((double) (value - minValue) / (maxValue - minValue)) * currNumRows);
              }
            } else {
              if (minValue > value || minValue == value && closedBound) {
                return currNumRows;
              }
              if (maxValue < value || maxValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * currNumRows);
              }
            }
          } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
            double value = Double.parseDouble(boundValue);
            double maxValue = cs.getRange().maxValue.doubleValue();
            double minValue = cs.getRange().minValue.doubleValue();
            if (upperBound) {
              if (maxValue < value || maxValue == value && closedBound) {
                return currNumRows;
              }
              if (minValue > value || minValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((value - minValue) / (maxValue - minValue)) * currNumRows);
              }
            } else {
              if (minValue > value || minValue == value && closedBound) {
                return currNumRows;
              }
              if (maxValue < value || maxValue == value && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(((maxValue - value) / (maxValue - minValue)) * currNumRows);
              }
            }
          } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
            BigDecimal value = new BigDecimal(boundValue);
            BigDecimal maxValue = new BigDecimal(cs.getRange().maxValue.toString());
            BigDecimal minValue = new BigDecimal(cs.getRange().minValue.toString());
            int minComparison = value.compareTo(minValue);
            int maxComparison = value.compareTo(maxValue);
            if (upperBound) {
              if (maxComparison > 0 || maxComparison == 0 && closedBound) {
                return currNumRows;
              }
              if (minComparison < 0 || minComparison == 0 && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(
                    ((value.subtract(minValue)).divide(maxValue.subtract(minValue), RoundingMode.UP))
                        .multiply(BigDecimal.valueOf(currNumRows))
                        .doubleValue());
              }
            } else {
              if (minComparison < 0 || minComparison == 0 && closedBound) {
                return currNumRows;
              }
              if (maxComparison > 0 || maxComparison == 0 && !closedBound) {
                return 0;
              }
              if (aspCtx.isUniformWithinRange()) {
                // Assuming uniform distribution, we can use the range to calculate
                // new estimate for the number of rows
                return Math.round(
                    ((maxValue.subtract(value)).divide(maxValue.subtract(minValue), RoundingMode.UP))
                        .multiply(BigDecimal.valueOf(currNumRows))
                        .doubleValue());
              }
            }
          }
        } catch (NumberFormatException nfe) {
          return currNumRows / 3;
        }
      }
      // default
      return currNumRows / 3;
    }