in core/src/main/java/org/apache/calcite/profile/SimpleProfiler.java [126:269]
Profile profile(Iterable<List<Comparable>> rows) {
final List<Comparable> values = new ArrayList<>();
int rowCount = 0;
for (final List<Comparable> row : rows) {
++rowCount;
joint:
for (Space space : spaces) {
values.clear();
for (Column column : space.columns) {
final Comparable value = row.get(column.ordinal);
values.add(value);
if (value == NullSentinel.INSTANCE) {
space.nullCount++;
continue joint;
}
}
space.values.add(FlatLists.ofComparable(values));
}
}
// Populate unique keys
// If [x, y] is a key,
// then [x, y, z] is a key but not intersecting,
// and [x, y] => [a] is a functional dependency but not interesting,
// and [x, y, z] is not an interesting distribution.
final Map<ImmutableBitSet, Distribution> distributions = new HashMap<>();
for (Space space : spaces) {
if (space.values.size() == rowCount
&& !containsKey(space.columnOrdinals, false)) {
// We have discovered a new key.
// It is not an existing key or a super-set of a key.
statistics.add(new Unique(space.columns));
space.unique = true;
keyOrdinalLists.add(space.columnOrdinals);
}
int nonMinimal = 0;
dependents:
for (Space s : results.getDescendants(space)) {
if (s.cardinality() == space.cardinality()) {
// We have discovered a sub-set that has the same cardinality.
// The column(s) that are not in common are functionally
// dependent.
final ImmutableBitSet dependents =
space.columnOrdinals.except(s.columnOrdinals);
for (int i : s.columnOrdinals) {
final Space s1 = singletonSpaces.get(i);
final ImmutableBitSet rest = s.columnOrdinals.clear(i);
for (ImmutableBitSet dependent : requireNonNull(s1, "s1").dependents) {
if (rest.contains(dependent)) {
// The "key" of this functional dependency is not minimal.
// For instance, if we know that
// (a) -> x
// then
// (a, b, x) -> y
// is not minimal; we could say the same with a smaller key:
// (a, b) -> y
++nonMinimal;
continue dependents;
}
}
}
for (int dependent : dependents) {
final Space s1 = singletonSpaces.get(dependent);
for (ImmutableBitSet d : requireNonNull(s1, "s1").dependents) {
if (s.columnOrdinals.contains(d)) {
++nonMinimal;
continue dependents;
}
}
}
space.dependencies.or(dependents.toBitSet());
for (int d : dependents) {
Space spaceD =
requireNonNull(singletonSpaces.get(d),
() -> "singletonSpaces.get(d) is null for " + d);
spaceD.dependents.add(s.columnOrdinals);
}
}
}
int nullCount;
final SortedSet<Comparable> valueSet;
if (space.columns.size() == 1) {
nullCount = space.nullCount;
valueSet =
ImmutableSortedSet.copyOf(
Util.transform(space.values, Iterables::getOnlyElement));
} else {
nullCount = -1;
valueSet = null;
}
double expectedCardinality;
final double cardinality = space.cardinality();
switch (space.columns.size()) {
case 0:
expectedCardinality = 1d;
break;
case 1:
expectedCardinality = rowCount;
break;
default:
expectedCardinality = rowCount;
for (Column column : space.columns) {
final Distribution d1 =
distributions.get(ImmutableBitSet.of(column.ordinal));
final Distribution d2 =
distributions.get(space.columnOrdinals.clear(column.ordinal));
final double d =
Lattice.getRowCount(rowCount,
requireNonNull(d1, "d1").cardinality,
requireNonNull(d2, "d2").cardinality);
expectedCardinality = Math.min(expectedCardinality, d);
}
}
final boolean minimal = nonMinimal == 0
&& !space.unique
&& !containsKey(space.columnOrdinals, true);
final Distribution distribution =
new Distribution(space.columns, valueSet, cardinality, nullCount,
expectedCardinality, minimal);
statistics.add(distribution);
distributions.put(space.columnOrdinals, distribution);
if (distribution.minimal) {
results.add(space);
}
}
for (Space s : singletonSpaces) {
for (ImmutableBitSet dependent : requireNonNull(s, "s").dependents) {
if (!containsKey(dependent, false)
&& !hasNull(dependent)) {
statistics.add(
new FunctionalDependency(toColumns(dependent),
Iterables.getOnlyElement(s.columns)));
}
}
}
return new Profile(columns, new RowCount(rowCount),
Iterables.filter(statistics, FunctionalDependency.class),
Iterables.filter(statistics, Distribution.class),
Iterables.filter(statistics, Unique.class));
}