in src/main/scala/com/amazon/deequ/analyzers/catalyst/StatefulHyperloglogPlus.scala [210:257]
def count(words: Array[Long]): Double = {
// Compute the inverse of indicator value 'z' and count the number of zeros 'V'.
var zInverse = 0.0d
var V = 0.0d
var idx = 0
var wordOffset = 0
while (wordOffset < words.length) {
val word = words(wordOffset)
var i = 0
var shift = 0
while (idx < M && i < REGISTERS_PER_WORD) {
val Midx = (word >>> shift) & REGISTER_WORD_MASK
zInverse += 1.0 / (1 << Midx)
if (Midx == 0) {
V += 1.0d
}
shift += REGISTER_SIZE
i += 1
idx += 1
}
wordOffset += 1
}
// We integrate two steps from the paper:
// val Z = 1.0d / zInverse
// val E = alphaM2 * Z
@inline
def EBiasCorrected = ALPHA_M2 / zInverse match {
case e if P < 19 && e < 5.0d * M => e - estimateBias(e)
case e => e
}
// Estimate the cardinality.
val estimate = if (V > 0) {
// Use linear counting for small cardinality estimates.
val H = M * Math.log(M / V)
if (H <= THRESHOLDS(P - 4)) {
H
} else {
EBiasCorrected
}
} else {
EBiasCorrected
}
// Round to the nearest long value.
Math.round(estimate)
}