public static ColumnStatisticsData createHiveColStatsData()

in fe/src/main/java/org/apache/impala/catalog/ColumnStats.java [787:951]


  public static ColumnStatisticsData createHiveColStatsData(
      long capNdv, TColumnStats colStats, Type colType) {
    ColumnStatisticsData colStatsData = new ColumnStatisticsData();
    long ndv = colStats.getNum_distinct_values();
    // Cap NDV at row count if available.
    if (capNdv >= 0) ndv = Math.min(ndv, capNdv);

    long numNulls = colStats.getNum_nulls();
    long numTrues = colStats.getNum_trues();
    long numFalses = colStats.getNum_falses();
    boolean isLowValueSet = colStats.isSetLow_value();
    boolean isHighValueSet = colStats.isSetHigh_value();
    long maxStrLen = colStats.getMax_size();
    double avgStrLen = colStats.getAvg_size();
    switch(colType.getPrimitiveType()) {
      case BOOLEAN:
        colStatsData.setBooleanStats(
            new BooleanColumnStatsData(numTrues, numFalses, numNulls));
        break;
      case TINYINT:
        {
          ndv = Math.min(ndv, LongMath.pow(2, Byte.SIZE));
          LongColumnStatsData longColStatsData = new LongColumnStatsData(numNulls, ndv);
          Long lowValue = null;
          Long highValue = null;
          if (isLowValueSet && colStats.low_value.isSetByte_val()) {
            lowValue = (long) colStats.low_value.getByte_val();
          }
          if (isHighValueSet && colStats.high_value.isSetByte_val()) {
            highValue = (long) colStats.high_value.getByte_val();
          }
          updateLowAndHighForHiveColumnStatsData(lowValue, highValue, longColStatsData);
          colStatsData.setLongStats(longColStatsData);
        }
        break;
      case SMALLINT:
        {
          ndv = Math.min(ndv, LongMath.pow(2, Short.SIZE));
          LongColumnStatsData longColStatsData = new LongColumnStatsData(numNulls, ndv);

          Long lowValue = null;
          Long highValue = null;
          if (isLowValueSet && colStats.low_value.isSetShort_val()) {
            lowValue = (long) colStats.low_value.getShort_val();
          }
          if (isHighValueSet && colStats.high_value.isSetShort_val()) {
            highValue = (long) colStats.high_value.getShort_val();
          }
          updateLowAndHighForHiveColumnStatsData(lowValue, highValue, longColStatsData);

          colStatsData.setLongStats(longColStatsData);
        }
        break;
      case INT:
        {
          ndv = Math.min(ndv, LongMath.pow(2, Integer.SIZE));
          LongColumnStatsData longColStatsData = new LongColumnStatsData(numNulls, ndv);

          Long lowValue = null;
          Long highValue = null;
          if (isLowValueSet && colStats.low_value.isSetInt_val()) {
            lowValue = (long) colStats.low_value.getInt_val();
          }
          if (isHighValueSet && colStats.high_value.isSetInt_val()) {
            highValue = (long) colStats.high_value.getInt_val();
          }
          updateLowAndHighForHiveColumnStatsData(lowValue, highValue, longColStatsData);

          colStatsData.setLongStats(longColStatsData);
        }
        break;
      case DATE:
        {
          // Number of distinct dates in the 0001-01-01..9999-12-31 inclusive range is
          // 3652059.
          ndv = Math.min(ndv, 3652059);
          DateColumnStatsData dateColStatsData = new DateColumnStatsData(numNulls, ndv);
          Date lowValue = null;
          Date highValue = null;
          if (isLowValueSet && colStats.low_value.isSetDate_val()) {
            lowValue = new Date(colStats.low_value.getDate_val());
          }
          if (isHighValueSet && colStats.high_value.isSetDate_val()) {
            highValue = new Date(colStats.high_value.getDate_val());
          }
          updateLowAndHighForHiveColumnStatsData(lowValue, highValue, dateColStatsData);
          colStatsData.setDateStats(dateColStatsData);
        }
        break;
      case BIGINT:
        {
          LongColumnStatsData longColStatsData = new LongColumnStatsData(numNulls, ndv);

          Long lowValue = null;
          Long highValue = null;
          if (isLowValueSet && colStats.low_value.isSetLong_val()) {
            lowValue = colStats.low_value.getLong_val();
          }
          if (isHighValueSet && colStats.high_value.isSetLong_val()) {
            highValue = colStats.high_value.getLong_val();
          }
          updateLowAndHighForHiveColumnStatsData(lowValue, highValue, longColStatsData);

          colStatsData.setLongStats(longColStatsData);
        }
        break;
      case TIMESTAMP: // Hive and Impala use LongColumnStatsData for timestamps.
        colStatsData.setLongStats(new LongColumnStatsData(numNulls, ndv));
        break;
      case FLOAT:
      case DOUBLE:
        {
          DoubleColumnStatsData doubleColStatsData =
              new DoubleColumnStatsData(numNulls, ndv);

          Double lowValue = null;
          Double highValue = null;
          if (isLowValueSet && colStats.low_value.isSetDouble_val()) {
            lowValue = colStats.low_value.getDouble_val();
          }
          if (isHighValueSet && colStats.high_value.isSetDouble_val()) {
            highValue = colStats.high_value.getDouble_val();
          }
          updateLowAndHighForHiveColumnStatsData(lowValue, highValue, doubleColStatsData);

          colStatsData.setDoubleStats(doubleColStatsData);
        }
        break;
      case CHAR:
      case VARCHAR:
      case STRING:
        colStatsData.setStringStats(
            new StringColumnStatsData(maxStrLen, avgStrLen, numNulls, ndv));
        break;
      case BINARY:
        // No NDV is stored for BINARY.
        colStatsData.setBinaryStats(
            new BinaryColumnStatsData(maxStrLen, avgStrLen, numNulls));
        break;
      case DECIMAL:
        {
          double decMaxNdv = Math.pow(10, colType.getPrecision());
          ndv = (long) Math.min(ndv, decMaxNdv);
          DecimalColumnStatsData decimalStatsData =
              new DecimalColumnStatsData(numNulls, ndv);
          Decimal lowValue = null;
          Decimal highValue = null;
          ScalarType colTypeScalar = (ScalarType) colType;
          if (isLowValueSet && colStats.low_value.isSetDecimal_val()) {
            lowValue = new Decimal((short) colTypeScalar.decimalScale(),
                colStats.low_value.bufferForDecimal_val());
          }
          if (isHighValueSet && colStats.high_value.isSetDecimal_val()) {
            highValue = new Decimal((short) colTypeScalar.decimalScale(),
                colStats.high_value.bufferForDecimal_val());
          }
          updateLowAndHighForHiveColumnStatsData(lowValue, highValue, decimalStatsData);
          colStatsData.setDecimalStats(decimalStatsData);
        }
        break;
      default:
        return null;
    }
    return colStatsData;
  }