private void populateStats()

in contrib/format-maprdb/src/main/java/org/apache/drill/exec/planner/index/MapRDBStatistics.java [348:491]
112 lines of code
16 McCabe index (conditional complexity)

  private void populateStats(RexNode condition, IndexCollection indexes, DrillScanRelBase scanRel,
                               IndexCallContext context) {
    JsonTableGroupScan jTabGrpScan;
    Map<IndexDescriptor, IndexConditionInfo> firstKeyIdxConditionMap;
    Map<IndexDescriptor, IndexConditionInfo> idxConditionMap;
    /* Map containing the individual base conditions of an ANDed/ORed condition and their selectivities.
     * This is used to compute the overall selectivity of a complex ANDed/ORed condition using its base
     * conditions. Helps prevent over/under estimates and guessed selectivity for ORed predicates.
     */
    Map<String, Double> baseConditionMap;
    GroupScan grpScan = IndexPlanUtils.getGroupScan(scanRel);

    if ((scanRel instanceof DrillScanRel || scanRel instanceof ScanPrel) &&
        grpScan instanceof JsonTableGroupScan) {
      jTabGrpScan = (JsonTableGroupScan) grpScan;
    } else {
      logger.debug("Statistics: populateStats exit early - not an instance of JsonTableGroupScan!");
      return;
    }
    if (condition == null) {
      populateStatsForNoFilter(jTabGrpScan, indexes, scanRel, context);
      statsAvailable = true;
      return;
    }

    RexBuilder builder = scanRel.getCluster().getRexBuilder();
    PlannerSettings settings = PrelUtil.getSettings(scanRel.getCluster());
    // Get the stats payload for full table (has total rows in the table)
    StatisticsPayload ftsPayload = jTabGrpScan.getFirstKeyEstimatedStats(null, null, scanRel);

    // Get the average row size for table and all indexes
    addToCache(null, jTabGrpScan.getAverageRowSizeStats(null), ftsPayload);
    if (ftsPayload == null || ftsPayload.getRowCount() == 0) {
      return;
    }
    for (IndexDescriptor idx : indexes) {
      StatisticsPayload idxRowSizePayload = jTabGrpScan.getAverageRowSizeStats(idx);
      addToCache(idx, idxRowSizePayload, ftsPayload);
    }

    /* Only use indexes with distinct first key */
    IndexCollection distFKeyIndexes = distinctFKeyIndexes(indexes, scanRel);
    IndexConditionInfo.Builder infoBuilder = IndexConditionInfo.newBuilder(condition,
        distFKeyIndexes, builder, scanRel);
    idxConditionMap = infoBuilder.getIndexConditionMap();
    firstKeyIdxConditionMap = infoBuilder.getFirstKeyIndexConditionMap();
    baseConditionMap = new HashMap<>();
    for (IndexDescriptor idx : firstKeyIdxConditionMap.keySet()) {
      if(IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
        continue;
      }
      RexNode idxCondition = firstKeyIdxConditionMap.get(idx).indexCondition;
      /* Use the pre-processed condition only for getting actual statistic from MapR-DB APIs. Use the
       * original condition everywhere else (cache store/lookups) since the RexNode condition and its
       * corresponding QueryCondition will be used to get statistics. e.g. we convert LIKE into RANGE
       * condition to get statistics. However, statistics are always asked for LIKE and NOT the RANGE
       */
      RexNode preProcIdxCondition = convertToStatsCondition(idxCondition, idx, context, scanRel,
          Arrays.asList(SqlKind.CAST, SqlKind.LIKE));
      RelDataType newRowType;
      FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
      if (functionInfo.hasFunctional()) {
        newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
      } else {
        newRowType = scanRel.getRowType();
      }

      QueryCondition queryCondition = jTabGrpScan.convertToQueryCondition(
          convertToLogicalExpression(preProcIdxCondition, newRowType, settings, builder));
      // Cap rows/size at total rows in case of issues with DB APIs
      StatisticsPayload idxPayload = jTabGrpScan.getFirstKeyEstimatedStats(queryCondition, idx, scanRel);
      double rowCount = Math.min(idxPayload.getRowCount(), ftsPayload.getRowCount());
      double leadingRowCount = Math.min(idxPayload.getLeadingRowCount(), rowCount);
      double avgRowSize = Math.min(idxPayload.getAvgRowSize(), ftsPayload.getAvgRowSize());
      StatisticsPayload payload = new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize);
      addToCache(idxCondition, idx, context, payload, jTabGrpScan, scanRel, newRowType);
      addBaseConditions(idxCondition, payload, false, baseConditionMap, scanRel.getRowType());
    }
    /* Add the row count for index conditions on all indexes. Stats are only computed for leading
     * keys but index conditions can be pushed and would be required for access path costing
     */
    for (IndexDescriptor idx : idxConditionMap.keySet()) {
      if(IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
        continue;
      }
      Map<LogicalExpression, RexNode> leadingPrefixMap = Maps.newHashMap();
      double rowCount, leadingRowCount, avgRowSize;
      RexNode idxCondition = idxConditionMap.get(idx).indexCondition;
      // Ignore conditions which always evaluate to true
      if (idxCondition.isAlwaysTrue()) {
        continue;
      }
      RexNode idxIncColCondition = idxConditionMap.get(idx).remainderCondition;
      RexNode idxRemColCondition = IndexPlanUtils.getLeadingPrefixMap(leadingPrefixMap, idx.getIndexColumns(), infoBuilder, idxCondition);
      RexNode idxLeadColCondition = IndexPlanUtils.getLeadingColumnsFilter(
          IndexPlanUtils.getLeadingFilters(leadingPrefixMap, idx.getIndexColumns()), builder);
      RexNode idxTotRemColCondition = IndexPlanUtils.getTotalRemainderFilter(idxRemColCondition, idxIncColCondition, builder);
      RexNode idxTotColCondition = IndexPlanUtils.getTotalFilter(idxLeadColCondition, idxTotRemColCondition, builder);
      FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
      RelDataType newRowType = scanRel.getRowType();
      if (functionInfo.hasFunctional()) {
        newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
      }
      /* For non-covering plans we would need the index leading condition */
      rowCount = ftsPayload.getRowCount() * computeSelectivity(idxLeadColCondition, idx,
          ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
      leadingRowCount = rowCount;
      avgRowSize = fIStatsCache.get(buildUniqueIndexIdentifier(idx)).getAvgRowSize();
      addToCache(idxLeadColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize),
          jTabGrpScan, scanRel, newRowType);
      /* For covering plans we would need the full condition */
      rowCount = ftsPayload.getRowCount() * computeSelectivity(idxTotColCondition, idx,
          ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
      addToCache(idxTotColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize),
          jTabGrpScan, scanRel, newRowType);
      /* For intersect plans we would need the index condition */
      rowCount = ftsPayload.getRowCount() * computeSelectivity(idxCondition, idx,
          ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
      addToCache(idxCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize),
          jTabGrpScan, scanRel, newRowType);
      /* Add the rowCount for condition on only included columns - no leading columns here! */
      if (idxIncColCondition != null) {
        rowCount = ftsPayload.getRowCount() * computeSelectivity(idxIncColCondition, null,
            ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
        addToCache(idxIncColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, rowCount, avgRowSize),
            jTabGrpScan, scanRel, newRowType);
      }
    }

    // Add the rowCount for the complete condition - based on table
    double rowCount = ftsPayload.getRowCount() * computeSelectivity(condition, null,
        ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
    // Here, ftsLeadingKey rowcount is based on _id predicates
    StatisticsPayload ftsLeadingKeyPayload = jTabGrpScan.getFirstKeyEstimatedStats(jTabGrpScan.convertToQueryCondition(
        convertToLogicalExpression(condition, scanRel.getRowType(), settings, builder)), null, scanRel);
    addToCache(condition, null, null, new MapRDBStatisticsPayload(rowCount, ftsLeadingKeyPayload.getRowCount(),
        ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
    // Add the full table rows while we are at it - represented by <NULL> RexNode, <NULL> QueryCondition.
    // No ftsLeadingKey so leadingKeyRowcount = totalRowCount
    addToCache(null, null, null, new MapRDBStatisticsPayload(ftsPayload.getRowCount(), ftsPayload.getRowCount(),
        ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
    // mark stats has been statsAvailable
    statsAvailable = true;
  }