in src/main/scala/com/amazon/deequ/analyzers/catalyst/StatefulHyperloglogPlus.scala [259:297]
def estimateBias(e: Double): Double = {
val estimates = RAW_ESTIMATE_DATA(P - 4)
val numEstimates = estimates.length
// The estimates are sorted so we can use a binary search to find the index of the
// interpolation estimate closest to the current estimate.
val nearestEstimateIndex = java.util.Arrays.binarySearch(estimates, 0, numEstimates, e) match {
case ix if ix < 0 => -(ix + 1)
case ix => ix
}
// Use square of the difference between the current estimate and the estimate at the given
// index as distance metric.
def distance(i: Int): Double = {
val diff = e - estimates(i)
diff * diff
}
// Keep moving bounds as long as the (exclusive) high bound is closer to the estimate than
// the lower (inclusive) bound.
var low = math.max(nearestEstimateIndex - K + 1, 0)
var high = math.min(low + K, numEstimates)
while (high < numEstimates && distance(high) < distance(low)) {
low += 1
high += 1
}
// Calculate the sum of the biases in low-high interval.
val biases = BIAS_DATA(P - 4)
var i = low
var biasSum = 0.0
while (i < high) {
biasSum += biases(i)
i += 1
}
// Calculate the bias.
biasSum / (high - low)
}