in src/main/java/org/apache/datasketches/characterization/hll/HllMergeAccuracyProfile.java [48:81]
private void runMergeTrials() {
long key = random.nextLong();
final int lgK = Integer.parseInt(job.getProperties().mustGet("lgK"));
final int numTrials = Integer.parseInt(job.getProperties().mustGet("numTrials"));
final int numSketches = Integer.parseInt(job.getProperties().mustGet("numSketches"));
final int distinctKeysPerSketch = Integer.parseInt(job.getProperties().mustGet("distinctKeysPerSketch"));
final double trueCount = numSketches * distinctKeysPerSketch;
double sumEstimates = 0;
double sumOfSquaredDeviationsFromTrueCount = 0;
for (int t = 0; t < numTrials; t++) {
final Union union = new Union(lgK);
for (int s = 0; s < numSketches; s++) {
final HllSketch sketch = new HllSketch(lgK, TgtHllType.HLL_8);
for (int k = 0; k < distinctKeysPerSketch; k++) {
sketch.update(key++);
}
union.update(sketch);
}
final double estimatedCount = union.getResult().getEstimate();
sumEstimates += estimatedCount;
sumOfSquaredDeviationsFromTrueCount += (estimatedCount - trueCount) * (estimatedCount - trueCount);
}
final double meanEstimate = sumEstimates / numTrials;
final double meanRelativeError = meanEstimate / trueCount - 1;
final double relativeStandardError
= Math.sqrt(sumOfSquaredDeviationsFromTrueCount / numTrials) / trueCount;
job.println("True count: " + trueCount);
job.println("Mean estimate: " + meanEstimate);
job.println("Mean Relative Error: " + meanRelativeError);
job.println("Relative Standard Error: " + relativeStandardError);
}