in contrib/format-maprdb/src/main/java/org/apache/drill/exec/planner/index/MapRDBStatistics.java [348:491]
private void populateStats(RexNode condition, IndexCollection indexes, DrillScanRelBase scanRel,
IndexCallContext context) {
JsonTableGroupScan jTabGrpScan;
Map<IndexDescriptor, IndexConditionInfo> firstKeyIdxConditionMap;
Map<IndexDescriptor, IndexConditionInfo> idxConditionMap;
/* Map containing the individual base conditions of an ANDed/ORed condition and their selectivities.
* This is used to compute the overall selectivity of a complex ANDed/ORed condition using its base
* conditions. Helps prevent over/under estimates and guessed selectivity for ORed predicates.
*/
Map<String, Double> baseConditionMap;
GroupScan grpScan = IndexPlanUtils.getGroupScan(scanRel);
if ((scanRel instanceof DrillScanRel || scanRel instanceof ScanPrel) &&
grpScan instanceof JsonTableGroupScan) {
jTabGrpScan = (JsonTableGroupScan) grpScan;
} else {
logger.debug("Statistics: populateStats exit early - not an instance of JsonTableGroupScan!");
return;
}
if (condition == null) {
populateStatsForNoFilter(jTabGrpScan, indexes, scanRel, context);
statsAvailable = true;
return;
}
RexBuilder builder = scanRel.getCluster().getRexBuilder();
PlannerSettings settings = PrelUtil.getSettings(scanRel.getCluster());
// Get the stats payload for full table (has total rows in the table)
StatisticsPayload ftsPayload = jTabGrpScan.getFirstKeyEstimatedStats(null, null, scanRel);
// Get the average row size for table and all indexes
addToCache(null, jTabGrpScan.getAverageRowSizeStats(null), ftsPayload);
if (ftsPayload == null || ftsPayload.getRowCount() == 0) {
return;
}
for (IndexDescriptor idx : indexes) {
StatisticsPayload idxRowSizePayload = jTabGrpScan.getAverageRowSizeStats(idx);
addToCache(idx, idxRowSizePayload, ftsPayload);
}
/* Only use indexes with distinct first key */
IndexCollection distFKeyIndexes = distinctFKeyIndexes(indexes, scanRel);
IndexConditionInfo.Builder infoBuilder = IndexConditionInfo.newBuilder(condition,
distFKeyIndexes, builder, scanRel);
idxConditionMap = infoBuilder.getIndexConditionMap();
firstKeyIdxConditionMap = infoBuilder.getFirstKeyIndexConditionMap();
baseConditionMap = new HashMap<>();
for (IndexDescriptor idx : firstKeyIdxConditionMap.keySet()) {
if(IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
continue;
}
RexNode idxCondition = firstKeyIdxConditionMap.get(idx).indexCondition;
/* Use the pre-processed condition only for getting actual statistic from MapR-DB APIs. Use the
* original condition everywhere else (cache store/lookups) since the RexNode condition and its
* corresponding QueryCondition will be used to get statistics. e.g. we convert LIKE into RANGE
* condition to get statistics. However, statistics are always asked for LIKE and NOT the RANGE
*/
RexNode preProcIdxCondition = convertToStatsCondition(idxCondition, idx, context, scanRel,
Arrays.asList(SqlKind.CAST, SqlKind.LIKE));
RelDataType newRowType;
FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
if (functionInfo.hasFunctional()) {
newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
} else {
newRowType = scanRel.getRowType();
}
QueryCondition queryCondition = jTabGrpScan.convertToQueryCondition(
convertToLogicalExpression(preProcIdxCondition, newRowType, settings, builder));
// Cap rows/size at total rows in case of issues with DB APIs
StatisticsPayload idxPayload = jTabGrpScan.getFirstKeyEstimatedStats(queryCondition, idx, scanRel);
double rowCount = Math.min(idxPayload.getRowCount(), ftsPayload.getRowCount());
double leadingRowCount = Math.min(idxPayload.getLeadingRowCount(), rowCount);
double avgRowSize = Math.min(idxPayload.getAvgRowSize(), ftsPayload.getAvgRowSize());
StatisticsPayload payload = new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize);
addToCache(idxCondition, idx, context, payload, jTabGrpScan, scanRel, newRowType);
addBaseConditions(idxCondition, payload, false, baseConditionMap, scanRel.getRowType());
}
/* Add the row count for index conditions on all indexes. Stats are only computed for leading
* keys but index conditions can be pushed and would be required for access path costing
*/
for (IndexDescriptor idx : idxConditionMap.keySet()) {
if(IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
continue;
}
Map<LogicalExpression, RexNode> leadingPrefixMap = Maps.newHashMap();
double rowCount, leadingRowCount, avgRowSize;
RexNode idxCondition = idxConditionMap.get(idx).indexCondition;
// Ignore conditions which always evaluate to true
if (idxCondition.isAlwaysTrue()) {
continue;
}
RexNode idxIncColCondition = idxConditionMap.get(idx).remainderCondition;
RexNode idxRemColCondition = IndexPlanUtils.getLeadingPrefixMap(leadingPrefixMap, idx.getIndexColumns(), infoBuilder, idxCondition);
RexNode idxLeadColCondition = IndexPlanUtils.getLeadingColumnsFilter(
IndexPlanUtils.getLeadingFilters(leadingPrefixMap, idx.getIndexColumns()), builder);
RexNode idxTotRemColCondition = IndexPlanUtils.getTotalRemainderFilter(idxRemColCondition, idxIncColCondition, builder);
RexNode idxTotColCondition = IndexPlanUtils.getTotalFilter(idxLeadColCondition, idxTotRemColCondition, builder);
FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
RelDataType newRowType = scanRel.getRowType();
if (functionInfo.hasFunctional()) {
newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
}
/* For non-covering plans we would need the index leading condition */
rowCount = ftsPayload.getRowCount() * computeSelectivity(idxLeadColCondition, idx,
ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
leadingRowCount = rowCount;
avgRowSize = fIStatsCache.get(buildUniqueIndexIdentifier(idx)).getAvgRowSize();
addToCache(idxLeadColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize),
jTabGrpScan, scanRel, newRowType);
/* For covering plans we would need the full condition */
rowCount = ftsPayload.getRowCount() * computeSelectivity(idxTotColCondition, idx,
ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
addToCache(idxTotColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize),
jTabGrpScan, scanRel, newRowType);
/* For intersect plans we would need the index condition */
rowCount = ftsPayload.getRowCount() * computeSelectivity(idxCondition, idx,
ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
addToCache(idxCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize),
jTabGrpScan, scanRel, newRowType);
/* Add the rowCount for condition on only included columns - no leading columns here! */
if (idxIncColCondition != null) {
rowCount = ftsPayload.getRowCount() * computeSelectivity(idxIncColCondition, null,
ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
addToCache(idxIncColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, rowCount, avgRowSize),
jTabGrpScan, scanRel, newRowType);
}
}
// Add the rowCount for the complete condition - based on table
double rowCount = ftsPayload.getRowCount() * computeSelectivity(condition, null,
ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
// Here, ftsLeadingKey rowcount is based on _id predicates
StatisticsPayload ftsLeadingKeyPayload = jTabGrpScan.getFirstKeyEstimatedStats(jTabGrpScan.convertToQueryCondition(
convertToLogicalExpression(condition, scanRel.getRowType(), settings, builder)), null, scanRel);
addToCache(condition, null, null, new MapRDBStatisticsPayload(rowCount, ftsLeadingKeyPayload.getRowCount(),
ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
// Add the full table rows while we are at it - represented by <NULL> RexNode, <NULL> QueryCondition.
// No ftsLeadingKey so leadingKeyRowcount = totalRowCount
addToCache(null, null, null, new MapRDBStatisticsPayload(ftsPayload.getRowCount(), ftsPayload.getRowCount(),
ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
// mark stats has been statsAvailable
statsAvailable = true;
}