Profile profile()

in core/src/main/java/org/apache/calcite/profile/SimpleProfiler.java [126:269]


    Profile profile(Iterable<List<Comparable>> rows) {
      final List<Comparable> values = new ArrayList<>();
      int rowCount = 0;
      for (final List<Comparable> row : rows) {
        ++rowCount;
      joint:
        for (Space space : spaces) {
          values.clear();
          for (Column column : space.columns) {
            final Comparable value = row.get(column.ordinal);
            values.add(value);
            if (value == NullSentinel.INSTANCE) {
              space.nullCount++;
              continue joint;
            }
          }
          space.values.add(FlatLists.ofComparable(values));
        }
      }

      // Populate unique keys
      // If [x, y] is a key,
      // then [x, y, z] is a key but not intersecting,
      // and [x, y] => [a] is a functional dependency but not interesting,
      // and [x, y, z] is not an interesting distribution.
      final Map<ImmutableBitSet, Distribution> distributions = new HashMap<>();
      for (Space space : spaces) {
        if (space.values.size() == rowCount
            && !containsKey(space.columnOrdinals, false)) {
          // We have discovered a new key.
          // It is not an existing key or a super-set of a key.
          statistics.add(new Unique(space.columns));
          space.unique = true;
          keyOrdinalLists.add(space.columnOrdinals);
        }

        int nonMinimal = 0;
      dependents:
        for (Space s : results.getDescendants(space)) {
          if (s.cardinality() == space.cardinality()) {
            // We have discovered a sub-set that has the same cardinality.
            // The column(s) that are not in common are functionally
            // dependent.
            final ImmutableBitSet dependents =
                space.columnOrdinals.except(s.columnOrdinals);
            for (int i : s.columnOrdinals) {
              final Space s1 = singletonSpaces.get(i);
              final ImmutableBitSet rest = s.columnOrdinals.clear(i);
              for (ImmutableBitSet dependent : requireNonNull(s1, "s1").dependents) {
                if (rest.contains(dependent)) {
                  // The "key" of this functional dependency is not minimal.
                  // For instance, if we know that
                  //   (a) -> x
                  // then
                  //   (a, b, x) -> y
                  // is not minimal; we could say the same with a smaller key:
                  //   (a, b) -> y
                  ++nonMinimal;
                  continue dependents;
                }
              }
            }
            for (int dependent : dependents) {
              final Space s1 = singletonSpaces.get(dependent);
              for (ImmutableBitSet d : requireNonNull(s1, "s1").dependents) {
                if (s.columnOrdinals.contains(d)) {
                  ++nonMinimal;
                  continue dependents;
                }
              }
            }
            space.dependencies.or(dependents.toBitSet());
            for (int d : dependents) {
              Space spaceD =
                  requireNonNull(singletonSpaces.get(d),
                      () -> "singletonSpaces.get(d) is null for " + d);
              spaceD.dependents.add(s.columnOrdinals);
            }
          }
        }

        int nullCount;
        final SortedSet<Comparable> valueSet;
        if (space.columns.size() == 1) {
          nullCount = space.nullCount;
          valueSet =
              ImmutableSortedSet.copyOf(
                  Util.transform(space.values, Iterables::getOnlyElement));
        } else {
          nullCount = -1;
          valueSet = null;
        }
        double expectedCardinality;
        final double cardinality = space.cardinality();
        switch (space.columns.size()) {
        case 0:
          expectedCardinality = 1d;
          break;
        case 1:
          expectedCardinality = rowCount;
          break;
        default:
          expectedCardinality = rowCount;
          for (Column column : space.columns) {
            final Distribution d1 =
                distributions.get(ImmutableBitSet.of(column.ordinal));
            final Distribution d2 =
                distributions.get(space.columnOrdinals.clear(column.ordinal));
            final double d =
                Lattice.getRowCount(rowCount,
                    requireNonNull(d1, "d1").cardinality,
                    requireNonNull(d2, "d2").cardinality);
            expectedCardinality = Math.min(expectedCardinality, d);
          }
        }
        final boolean minimal = nonMinimal == 0
            && !space.unique
            && !containsKey(space.columnOrdinals, true);
        final Distribution distribution =
            new Distribution(space.columns, valueSet, cardinality, nullCount,
                expectedCardinality, minimal);
        statistics.add(distribution);
        distributions.put(space.columnOrdinals, distribution);

        if (distribution.minimal) {
          results.add(space);
        }
      }

      for (Space s : singletonSpaces) {
        for (ImmutableBitSet dependent : requireNonNull(s, "s").dependents) {
          if (!containsKey(dependent, false)
              && !hasNull(dependent)) {
            statistics.add(
                new FunctionalDependency(toColumns(dependent),
                    Iterables.getOnlyElement(s.columns)));
          }
        }
      }
      return new Profile(columns, new RowCount(rowCount),
          Iterables.filter(statistics, FunctionalDependency.class),
          Iterables.filter(statistics, Distribution.class),
          Iterables.filter(statistics, Unique.class));
    }