in spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Hll.java [318:365]
public strictfp long estimateCardinality() {
if (type == HLL_DATA_EMPTY) {
return 0;
}
if (type == HLL_DATA_EXPLICIT) {
return hashSet.size();
}
int numStreams = HLL_REGISTERS_COUNT;
float alpha = 0;
if (numStreams == 16) {
alpha = 0.673f;
} else if (numStreams == 32) {
alpha = 0.697f;
} else if (numStreams == 64) {
alpha = 0.709f;
} else {
alpha = 0.7213f / (1 + 1.079f / numStreams);
}
float harmonicMean = 0;
int numZeroRegisters = 0;
for (int i = 0; i < HLL_REGISTERS_COUNT; i++) {
harmonicMean += Math.pow(2.0f, -registers[i]);
if (registers[i] == 0) {
numZeroRegisters++;
}
}
harmonicMean = 1.0f / harmonicMean;
double estimate = alpha * numStreams * numStreams * harmonicMean;
if (estimate <= numStreams * 2.5 && numZeroRegisters != 0) {
estimate = numStreams * Math.log(((float) numStreams) / ((float) numZeroRegisters));
} else if (numStreams == 16384 && estimate < 72000) {
double bias = 5.9119 * 1.0e-18 * (estimate * estimate * estimate * estimate)
- 1.4253 * 1.0e-12 * (estimate * estimate * estimate)
+ 1.2940 * 1.0e-7 * (estimate * estimate)
- 5.2921 * 1.0e-3 * estimate
+ 83.3216;
estimate -= estimate * (bias / 100);
}
return (long) (estimate + 0.5);
}