in src/main/scala/com/amazon/deequ/profiles/ColumnProfiler.scala [465:545]
private[this] def extractNumericStatistics(results: AnalyzerContext): NumericColumnStatistics = {
val means = results.metricMap
.collect { case (analyzer: Mean, metric: DoubleMetric) =>
metric.value match {
case Success(metricValue) => Some(analyzer.column -> metricValue)
case _ => None
}
}
.flatten
.toMap
val stdDevs = results.metricMap
.collect { case (analyzer: StandardDeviation, metric: DoubleMetric) =>
metric.value match {
case Success(metricValue) => Some(analyzer.column -> metricValue)
case _ => None
}
}
.flatten
.toMap
val maxima = results.metricMap
.collect { case (analyzer: Maximum, metric: DoubleMetric) =>
metric.value match {
case Success(metricValue) => Some(analyzer.column -> metricValue)
case _ => None
}
}
.flatten
.toMap
val minima = results.metricMap
.collect { case (analyzer: Minimum, metric: DoubleMetric) =>
metric.value match {
case Success(metricValue) => Some(analyzer.column -> metricValue)
case _ => None
}
}
.flatten
.toMap
val sums = results.metricMap
.collect { case (analyzer: Sum, metric: DoubleMetric) =>
metric.value match {
case Success(metricValue) => Some(analyzer.column -> metricValue)
case _ => None
}
}
.flatten
.toMap
val kll = results.metricMap
.collect { case (analyzer: KLLSketch, metric: KLLMetric) if metric.value.isSuccess =>
metric.value match {
case Success(bucketDistribution) =>
Some(analyzer.column -> bucketDistribution)
case _ => None
}
}
.flatten
.toMap
val approxPercentiles = results.metricMap
.collect { case (analyzer: KLLSketch, metric: KLLMetric) =>
metric.value match {
case Success(bucketDistribution) =>
val percentiles = bucketDistribution.computePercentiles()
Some(analyzer.column -> percentiles.toSeq.sorted)
case _ => None
}
}
.flatten
.toMap
NumericColumnStatistics(means, stdDevs, minima, maxima, sums, kll, approxPercentiles)
}