public static void fillColumnStatisticsData()

in standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java [780:981]


  public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data,
      Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh,
      Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses,
      Object avgLong, Object avgDouble, Object avgDecimal, Object sumDist,
      boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
    colType = colType.toLowerCase();
    if (colType.equals("boolean")) {
      BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
      boolStats.setNumFalses(MetastoreDirectSqlUtils.extractSqlLong(falses));
      boolStats.setNumTrues(MetastoreDirectSqlUtils.extractSqlLong(trues));
      boolStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
      data.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar")
        || colType.startsWith("char")) {
      StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
      stringStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
      stringStats.setAvgColLen(MetastoreDirectSqlUtils.extractSqlDouble(avglen));
      stringStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen));
      stringStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
      data.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
      BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
      binaryStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
      binaryStats.setAvgColLen(MetastoreDirectSqlUtils.extractSqlDouble(avglen));
      binaryStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen));
      data.setBinaryStats(binaryStats);
    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint")
        || colType.equals("tinyint")) {
      LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
      longStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
      if (lhigh != null) {
        longStats.setHighValue(MetastoreDirectSqlUtils.extractSqlLong(lhigh));
      }
      if (llow != null) {
        longStats.setLowValue(MetastoreDirectSqlUtils.extractSqlLong(llow));
      }
      long lowerBound = MetastoreDirectSqlUtils.extractSqlLong(dist);
      long higherBound = MetastoreDirectSqlUtils.extractSqlLong(sumDist);
      long rangeBound = Long.MAX_VALUE;
      if (lhigh != null && llow != null) {
        rangeBound = MetastoreDirectSqlUtils.extractSqlLong(lhigh)
            - MetastoreDirectSqlUtils.extractSqlLong(llow) + 1;
      }
      long estimation;
      if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null
          && MetastoreDirectSqlUtils.extractSqlDouble(avgLong) != 0.0) {
        // We have estimation, lowerbound and higherbound. We use estimation if
        // it is between lowerbound and higherbound.
        estimation = MetastoreDirectSqlUtils
            .extractSqlLong((MetastoreDirectSqlUtils.extractSqlLong(lhigh) - MetastoreDirectSqlUtils
                .extractSqlLong(llow)) / MetastoreDirectSqlUtils.extractSqlDouble(avgLong));
        if (estimation < lowerBound) {
          estimation = lowerBound;
        } else if (estimation > higherBound) {
          estimation = higherBound;
        }
      } else {
        estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner);
      }
      estimation = Math.min(estimation, rangeBound);
      longStats.setNumDVs(estimation);
      data.setLongStats(longStats);
    } else if (colType.equals("date")) {
      DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
      dateStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
      if (lhigh != null) {
        dateStats.setHighValue(new Date(MetastoreDirectSqlUtils.extractSqlLong(lhigh)));
      }
      if (llow != null) {
        dateStats.setLowValue(new Date(MetastoreDirectSqlUtils.extractSqlLong(llow)));
      }
      long lowerBound = MetastoreDirectSqlUtils.extractSqlLong(dist);
      long higherBound = MetastoreDirectSqlUtils.extractSqlLong(sumDist);
      long rangeBound = Long.MAX_VALUE;
      if (lhigh != null && llow != null) {
        rangeBound = MetastoreDirectSqlUtils.extractSqlLong(lhigh)
            - MetastoreDirectSqlUtils.extractSqlLong(llow) + 1;
      }
      long estimation;
      if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null
          && MetastoreDirectSqlUtils.extractSqlDouble(avgLong) != 0.0) {
        // We have estimation, lowerbound and higherbound. We use estimation if
        // it is between lowerbound and higherbound.
        estimation = MetastoreDirectSqlUtils
            .extractSqlLong((MetastoreDirectSqlUtils.extractSqlLong(lhigh) - MetastoreDirectSqlUtils
                .extractSqlLong(llow)) / MetastoreDirectSqlUtils.extractSqlDouble(avgLong));
        if (estimation < lowerBound) {
          estimation = lowerBound;
        } else if (estimation > higherBound) {
          estimation = higherBound;
        }
      } else {
        estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner);
      }
      estimation = Math.min(estimation, rangeBound);
      dateStats.setNumDVs(estimation);
      data.setDateStats(dateStats);
    } else if (colType.equals("timestamp")) {
      TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
      timestampStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
      if (lhigh != null) {
        timestampStats.setHighValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(lhigh)));
      }
      if (llow != null) {
        timestampStats.setLowValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(llow)));
      }
      long lowerBound = MetastoreDirectSqlUtils.extractSqlLong(dist);
      long higherBound = MetastoreDirectSqlUtils.extractSqlLong(sumDist);
      long rangeBound = Long.MAX_VALUE;
      if (lhigh != null && llow != null) {
        rangeBound = MetastoreDirectSqlUtils.extractSqlLong(lhigh)
            - MetastoreDirectSqlUtils.extractSqlLong(llow) + 1;
      }
      long estimation;
      if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null
          && MetastoreDirectSqlUtils.extractSqlDouble(avgLong) != 0.0) {
        // We have estimation, lowerbound and higherbound. We use estimation if
        // it is between lowerbound and higherbound.
        estimation = MetastoreDirectSqlUtils
            .extractSqlLong((MetastoreDirectSqlUtils.extractSqlLong(lhigh) - MetastoreDirectSqlUtils
                .extractSqlLong(llow)) / MetastoreDirectSqlUtils.extractSqlDouble(avgLong));
        if (estimation < lowerBound) {
          estimation = lowerBound;
        } else if (estimation > higherBound) {
          estimation = higherBound;
        }
      } else {
        estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner);
      }
      estimation = Math.min(estimation, rangeBound);
      timestampStats.setNumDVs(estimation);
      data.setTimestampStats(timestampStats);
    } else if (colType.equals("double") || colType.equals("float")) {
      DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
      doubleStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
      if (dhigh != null) {
        doubleStats.setHighValue(MetastoreDirectSqlUtils.extractSqlDouble(dhigh));
      }
      if (dlow != null) {
        doubleStats.setLowValue(MetastoreDirectSqlUtils.extractSqlDouble(dlow));
      }
      long lowerBound = MetastoreDirectSqlUtils.extractSqlLong(dist);
      long higherBound = MetastoreDirectSqlUtils.extractSqlLong(sumDist);
      if (useDensityFunctionForNDVEstimation && dhigh != null && dlow != null && avgDouble != null
          && MetastoreDirectSqlUtils.extractSqlDouble(avgDouble) != 0.0) {
        long estimation = MetastoreDirectSqlUtils
            .extractSqlLong((MetastoreDirectSqlUtils.extractSqlLong(dhigh) - MetastoreDirectSqlUtils
                .extractSqlLong(dlow)) / MetastoreDirectSqlUtils.extractSqlDouble(avgDouble));
        if (estimation < lowerBound) {
          doubleStats.setNumDVs(lowerBound);
        } else if (estimation > higherBound) {
          doubleStats.setNumDVs(higherBound);
        } else {
          doubleStats.setNumDVs(estimation);
        }
      } else {
        doubleStats.setNumDVs((long) (lowerBound + (higherBound - lowerBound) * ndvTuner));
      }
      data.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
      DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
      decimalStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
      Decimal low = null;
      Decimal high = null;
      BigDecimal blow = null;
      BigDecimal bhigh = null;
      if (dechigh instanceof BigDecimal) {
        bhigh = (BigDecimal) dechigh;
        high = DecimalUtils.getDecimal(ByteBuffer.wrap(bhigh.unscaledValue().toByteArray()),
            (short) bhigh.scale());
      } else if (dechigh instanceof String) {
        bhigh = new BigDecimal((String) dechigh);
        high = DecimalUtils.createThriftDecimal((String) dechigh);
      }
      decimalStats.setHighValue(high);
      if (declow instanceof BigDecimal) {
        blow = (BigDecimal) declow;
        low = DecimalUtils.getDecimal(ByteBuffer.wrap(blow.unscaledValue().toByteArray()), (short) blow.scale());
      } else if (dechigh instanceof String) {
        blow = new BigDecimal((String) declow);
        low = DecimalUtils.createThriftDecimal((String) declow);
      }
      decimalStats.setLowValue(low);
      long lowerBound = MetastoreDirectSqlUtils.extractSqlLong(dist);
      long higherBound = MetastoreDirectSqlUtils.extractSqlLong(sumDist);
      if (useDensityFunctionForNDVEstimation && dechigh != null && declow != null && avgDecimal != null
          && MetastoreDirectSqlUtils.extractSqlDouble(avgDecimal) != 0.0) {
        long estimation = MetastoreDirectSqlUtils.extractSqlLong(MetastoreDirectSqlUtils.extractSqlLong(bhigh
            .subtract(blow).floatValue() / MetastoreDirectSqlUtils.extractSqlDouble(avgDecimal)));
        if (estimation < lowerBound) {
          decimalStats.setNumDVs(lowerBound);
        } else if (estimation > higherBound) {
          decimalStats.setNumDVs(higherBound);
        } else {
          decimalStats.setNumDVs(estimation);
        }
      } else {
        decimalStats.setNumDVs((long) (lowerBound + (higherBound - lowerBound) * ndvTuner));
      }
      data.setDecimalStats(decimalStats);
    }
  }