in src/main/java/org/apache/datasketches/characterization/hll/DruidHllMergeAccuracyProfile.java [50:83]
private void runMergeTrials() {
long key = random.nextLong();
final int numTrials = Integer.parseInt(job.getProperties().mustGet("numTrials"));
final int numSketches = Integer.parseInt(job.getProperties().mustGet("numSketches"));
final int distinctKeysPerSketch = Integer.parseInt(job.getProperties().mustGet("distinctKeysPerSketch"));
final double trueCount = numSketches * distinctKeysPerSketch;
double sumEstimates = 0;
double sumOfSquaredDeviationsFromTrueCount = 0;
for (int t = 0; t < numTrials; t++) {
final HyperLogLogCollector union = HyperLogLogCollector.makeLatestCollector();
for (int s = 0; s < numSketches; s++) {
final HyperLogLogCollector sketch = HyperLogLogCollector.makeLatestCollector();
for (int k = 0; k < distinctKeysPerSketch; k++) {
DruidHllAccuracyProfile.longToByteArray(key++, bytes);
sketch.add(hash.hash(bytes));
}
union.fold(sketch);
}
final double estimatedCount = union.estimateCardinality();
sumEstimates += estimatedCount;
sumOfSquaredDeviationsFromTrueCount += (estimatedCount - trueCount) * (estimatedCount - trueCount);
}
final double meanEstimate = sumEstimates / numTrials;
final double meanRelativeError = meanEstimate / trueCount - 1;
final double relativeStandardError
= Math.sqrt(sumOfSquaredDeviationsFromTrueCount / numTrials) / trueCount;
job.println("True count: " + trueCount);
job. println("Mean estimate: " + meanEstimate);
job.println("Mean Relative Error: " + meanRelativeError);
job.println("Relative Standard Error: " + relativeStandardError);
}