in fe/src/main/java/org/apache/impala/catalog/ColumnStats.java [787:951]
public static ColumnStatisticsData createHiveColStatsData(
long capNdv, TColumnStats colStats, Type colType) {
ColumnStatisticsData colStatsData = new ColumnStatisticsData();
long ndv = colStats.getNum_distinct_values();
// Cap NDV at row count if available.
if (capNdv >= 0) ndv = Math.min(ndv, capNdv);
long numNulls = colStats.getNum_nulls();
long numTrues = colStats.getNum_trues();
long numFalses = colStats.getNum_falses();
boolean isLowValueSet = colStats.isSetLow_value();
boolean isHighValueSet = colStats.isSetHigh_value();
long maxStrLen = colStats.getMax_size();
double avgStrLen = colStats.getAvg_size();
switch(colType.getPrimitiveType()) {
case BOOLEAN:
colStatsData.setBooleanStats(
new BooleanColumnStatsData(numTrues, numFalses, numNulls));
break;
case TINYINT:
{
ndv = Math.min(ndv, LongMath.pow(2, Byte.SIZE));
LongColumnStatsData longColStatsData = new LongColumnStatsData(numNulls, ndv);
Long lowValue = null;
Long highValue = null;
if (isLowValueSet && colStats.low_value.isSetByte_val()) {
lowValue = (long) colStats.low_value.getByte_val();
}
if (isHighValueSet && colStats.high_value.isSetByte_val()) {
highValue = (long) colStats.high_value.getByte_val();
}
updateLowAndHighForHiveColumnStatsData(lowValue, highValue, longColStatsData);
colStatsData.setLongStats(longColStatsData);
}
break;
case SMALLINT:
{
ndv = Math.min(ndv, LongMath.pow(2, Short.SIZE));
LongColumnStatsData longColStatsData = new LongColumnStatsData(numNulls, ndv);
Long lowValue = null;
Long highValue = null;
if (isLowValueSet && colStats.low_value.isSetShort_val()) {
lowValue = (long) colStats.low_value.getShort_val();
}
if (isHighValueSet && colStats.high_value.isSetShort_val()) {
highValue = (long) colStats.high_value.getShort_val();
}
updateLowAndHighForHiveColumnStatsData(lowValue, highValue, longColStatsData);
colStatsData.setLongStats(longColStatsData);
}
break;
case INT:
{
ndv = Math.min(ndv, LongMath.pow(2, Integer.SIZE));
LongColumnStatsData longColStatsData = new LongColumnStatsData(numNulls, ndv);
Long lowValue = null;
Long highValue = null;
if (isLowValueSet && colStats.low_value.isSetInt_val()) {
lowValue = (long) colStats.low_value.getInt_val();
}
if (isHighValueSet && colStats.high_value.isSetInt_val()) {
highValue = (long) colStats.high_value.getInt_val();
}
updateLowAndHighForHiveColumnStatsData(lowValue, highValue, longColStatsData);
colStatsData.setLongStats(longColStatsData);
}
break;
case DATE:
{
// Number of distinct dates in the 0001-01-01..9999-12-31 inclusive range is
// 3652059.
ndv = Math.min(ndv, 3652059);
DateColumnStatsData dateColStatsData = new DateColumnStatsData(numNulls, ndv);
Date lowValue = null;
Date highValue = null;
if (isLowValueSet && colStats.low_value.isSetDate_val()) {
lowValue = new Date(colStats.low_value.getDate_val());
}
if (isHighValueSet && colStats.high_value.isSetDate_val()) {
highValue = new Date(colStats.high_value.getDate_val());
}
updateLowAndHighForHiveColumnStatsData(lowValue, highValue, dateColStatsData);
colStatsData.setDateStats(dateColStatsData);
}
break;
case BIGINT:
{
LongColumnStatsData longColStatsData = new LongColumnStatsData(numNulls, ndv);
Long lowValue = null;
Long highValue = null;
if (isLowValueSet && colStats.low_value.isSetLong_val()) {
lowValue = colStats.low_value.getLong_val();
}
if (isHighValueSet && colStats.high_value.isSetLong_val()) {
highValue = colStats.high_value.getLong_val();
}
updateLowAndHighForHiveColumnStatsData(lowValue, highValue, longColStatsData);
colStatsData.setLongStats(longColStatsData);
}
break;
case TIMESTAMP: // Hive and Impala use LongColumnStatsData for timestamps.
colStatsData.setLongStats(new LongColumnStatsData(numNulls, ndv));
break;
case FLOAT:
case DOUBLE:
{
DoubleColumnStatsData doubleColStatsData =
new DoubleColumnStatsData(numNulls, ndv);
Double lowValue = null;
Double highValue = null;
if (isLowValueSet && colStats.low_value.isSetDouble_val()) {
lowValue = colStats.low_value.getDouble_val();
}
if (isHighValueSet && colStats.high_value.isSetDouble_val()) {
highValue = colStats.high_value.getDouble_val();
}
updateLowAndHighForHiveColumnStatsData(lowValue, highValue, doubleColStatsData);
colStatsData.setDoubleStats(doubleColStatsData);
}
break;
case CHAR:
case VARCHAR:
case STRING:
colStatsData.setStringStats(
new StringColumnStatsData(maxStrLen, avgStrLen, numNulls, ndv));
break;
case BINARY:
// No NDV is stored for BINARY.
colStatsData.setBinaryStats(
new BinaryColumnStatsData(maxStrLen, avgStrLen, numNulls));
break;
case DECIMAL:
{
double decMaxNdv = Math.pow(10, colType.getPrecision());
ndv = (long) Math.min(ndv, decMaxNdv);
DecimalColumnStatsData decimalStatsData =
new DecimalColumnStatsData(numNulls, ndv);
Decimal lowValue = null;
Decimal highValue = null;
ScalarType colTypeScalar = (ScalarType) colType;
if (isLowValueSet && colStats.low_value.isSetDecimal_val()) {
lowValue = new Decimal((short) colTypeScalar.decimalScale(),
colStats.low_value.bufferForDecimal_val());
}
if (isHighValueSet && colStats.high_value.isSetDecimal_val()) {
highValue = new Decimal((short) colTypeScalar.decimalScale(),
colStats.high_value.bufferForDecimal_val());
}
updateLowAndHighForHiveColumnStatsData(lowValue, highValue, decimalStatsData);
colStatsData.setDecimalStats(decimalStatsData);
}
break;
default:
return null;
}
return colStatsData;
}