in ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java [1019:1307]
private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, ExprNodeGenericFuncDesc genFunc,
long currNumRows) {
GenericUDF udf = genFunc.getGenericUDF();
ExprNodeColumnDesc columnDesc;
ExprNodeConstantDesc constantDesc;
boolean upperBound;
boolean closedBound;
String boundValue;
if (genFunc.getChildren().get(0) instanceof ExprNodeColumnDesc &&
genFunc.getChildren().get(1) instanceof ExprNodeConstantDesc) {
columnDesc = (ExprNodeColumnDesc) genFunc.getChildren().get(0);
constantDesc = (ExprNodeConstantDesc) genFunc.getChildren().get(1);
aspCtx.addAffectedColumn(columnDesc);
// Comparison to null will always return false
if (constantDesc.getValue() == null) {
return 0;
}
boundValue = constantDesc.getValue().toString();
upperBound = udf instanceof GenericUDFOPEqualOrLessThan ||
udf instanceof GenericUDFOPLessThan;
closedBound = isClosedBound(udf);
} else if (genFunc.getChildren().get(1) instanceof ExprNodeColumnDesc &&
genFunc.getChildren().get(0) instanceof ExprNodeConstantDesc) {
columnDesc = (ExprNodeColumnDesc) genFunc.getChildren().get(1);
constantDesc = (ExprNodeConstantDesc) genFunc.getChildren().get(0);
aspCtx.addAffectedColumn(columnDesc);
// Comparison to null will always return false
if (constantDesc.getValue() == null) {
return 0;
}
boundValue = constantDesc.getValue().toString();
upperBound = udf instanceof GenericUDFOPEqualOrGreaterThan ||
udf instanceof GenericUDFOPGreaterThan;
closedBound = isClosedBound(udf);
} else {
// default
return currNumRows / 3;
}
ColStatistics cs = stats.getColumnStatisticsFromColName(columnDesc.getColumn());
String colTypeLowerCase = columnDesc.getTypeString().toLowerCase();
if (FilterSelectivityEstimator.isHistogramAvailable(cs)) {
try {
return evaluateComparatorWithHistogram(
cs, currNumRows, colTypeLowerCase, boundValue, upperBound, closedBound);
} catch (SketchesArgumentException e) {
LOG.info("Sketch-based statistics estimation failed, falling back to regular estimation", e);
}
}
if (cs != null && cs.getRange() != null &&
cs.getRange().maxValue != null && cs.getRange().minValue != null) {
try {
if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)) {
byte value = Byte.parseByte(boundValue);
byte maxValue = cs.getRange().maxValue.byteValue();
byte minValue = cs.getRange().minValue.byteValue();
if (upperBound) {
if (maxValue < value || maxValue == value && closedBound) {
return currNumRows;
}
if (minValue > value || minValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((double) (value - minValue) / (maxValue - minValue)) * currNumRows);
}
} else {
if (minValue > value || minValue == value && closedBound) {
return currNumRows;
}
if (maxValue < value || maxValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * currNumRows);
}
}
} else if (colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
short value = Short.parseShort(boundValue);
short maxValue = cs.getRange().maxValue.shortValue();
short minValue = cs.getRange().minValue.shortValue();
if (upperBound) {
if (maxValue < value || maxValue == value && closedBound) {
return currNumRows;
}
if (minValue > value || minValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((double) (value - minValue) / (maxValue - minValue)) * currNumRows);
}
} else {
if (minValue > value || minValue == value && closedBound) {
return currNumRows;
}
if (maxValue < value || maxValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * currNumRows);
}
}
} else if (colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME) ||
colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
int value;
if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
DateWritable writableVal = new DateWritable(java.sql.Date.valueOf(boundValue));
value = writableVal.getDays();
} else {
value = Integer.parseInt(boundValue);
}
// Date is an integer internally
int maxValue = cs.getRange().maxValue.intValue();
int minValue = cs.getRange().minValue.intValue();
if (upperBound) {
if (maxValue < value || maxValue == value && closedBound) {
return currNumRows;
}
if (minValue > value || minValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((double) (value - minValue) / (maxValue - minValue)) * currNumRows);
}
} else {
if (minValue > value || minValue == value && closedBound) {
return currNumRows;
}
if (maxValue < value || maxValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * currNumRows);
}
}
} else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME) ||
colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
long value;
if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
TimestampWritableV2 timestampWritable = new TimestampWritableV2(Timestamp.valueOf(boundValue));
value = timestampWritable.getTimestamp().toEpochSecond();
} else {
value = Long.parseLong(boundValue);
}
long maxValue = cs.getRange().maxValue.longValue();
long minValue = cs.getRange().minValue.longValue();
if (upperBound) {
if (maxValue < value || maxValue == value && closedBound) {
return currNumRows;
}
if (minValue > value || minValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((double) (value - minValue) / (maxValue - minValue)) * currNumRows);
}
} else {
if (minValue > value || minValue == value && closedBound) {
return currNumRows;
}
if (maxValue < value || maxValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * currNumRows);
}
}
} else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) {
float value = Float.parseFloat(boundValue);
float maxValue = cs.getRange().maxValue.floatValue();
float minValue = cs.getRange().minValue.floatValue();
if (upperBound) {
if (maxValue < value || maxValue == value && closedBound) {
return currNumRows;
}
if (minValue > value || minValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((double) (value - minValue) / (maxValue - minValue)) * currNumRows);
}
} else {
if (minValue > value || minValue == value && closedBound) {
return currNumRows;
}
if (maxValue < value || maxValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * currNumRows);
}
}
} else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
double value = Double.parseDouble(boundValue);
double maxValue = cs.getRange().maxValue.doubleValue();
double minValue = cs.getRange().minValue.doubleValue();
if (upperBound) {
if (maxValue < value || maxValue == value && closedBound) {
return currNumRows;
}
if (minValue > value || minValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((value - minValue) / (maxValue - minValue)) * currNumRows);
}
} else {
if (minValue > value || minValue == value && closedBound) {
return currNumRows;
}
if (maxValue < value || maxValue == value && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(((maxValue - value) / (maxValue - minValue)) * currNumRows);
}
}
} else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
BigDecimal value = new BigDecimal(boundValue);
BigDecimal maxValue = new BigDecimal(cs.getRange().maxValue.toString());
BigDecimal minValue = new BigDecimal(cs.getRange().minValue.toString());
int minComparison = value.compareTo(minValue);
int maxComparison = value.compareTo(maxValue);
if (upperBound) {
if (maxComparison > 0 || maxComparison == 0 && closedBound) {
return currNumRows;
}
if (minComparison < 0 || minComparison == 0 && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(
((value.subtract(minValue)).divide(maxValue.subtract(minValue), RoundingMode.UP))
.multiply(BigDecimal.valueOf(currNumRows))
.doubleValue());
}
} else {
if (minComparison < 0 || minComparison == 0 && closedBound) {
return currNumRows;
}
if (maxComparison > 0 || maxComparison == 0 && !closedBound) {
return 0;
}
if (aspCtx.isUniformWithinRange()) {
// Assuming uniform distribution, we can use the range to calculate
// new estimate for the number of rows
return Math.round(
((maxValue.subtract(value)).divide(maxValue.subtract(minValue), RoundingMode.UP))
.multiply(BigDecimal.valueOf(currNumRows))
.doubleValue());
}
}
}
} catch (NumberFormatException nfe) {
return currNumRows / 3;
}
}
// default
return currNumRows / 3;
}