in src/main/scala/com/amazon/deequ/profiles/ColumnProfiler.scala [675:728]
private[this] def createProfiles(
columns: Seq[String],
genericStats: GenericColumnStatistics,
numericStats: NumericColumnStatistics,
categoricalStats: CategoricalColumnStatistics)
: ColumnProfiles = {
val profiles = columns
.map { name =>
val completeness = genericStats.completenesses(name)
val approxNumDistinct = genericStats.approximateNumDistincts(name)
val dataType = genericStats.typeOf(name)
val isDataTypeInferred = genericStats.inferredTypes.contains(name)
val histogram = categoricalStats.histograms.get(name)
val typeCounts = genericStats.typeDetectionHistograms.getOrElse(name, Map.empty)
val profile = genericStats.typeOf(name) match {
case Integral | Fractional =>
NumericColumnProfile(
name,
completeness,
approxNumDistinct,
dataType,
isDataTypeInferred,
typeCounts,
histogram,
numericStats.kll.get(name),
numericStats.means.get(name),
numericStats.maxima.get(name),
numericStats.minima.get(name),
numericStats.sums.get(name),
numericStats.stdDevs.get(name),
numericStats.approxPercentiles.get(name))
case _ =>
StandardColumnProfile(
name,
completeness,
approxNumDistinct,
dataType,
isDataTypeInferred,
typeCounts,
histogram)
}
name -> profile
}
.toMap
ColumnProfiles(profiles, genericStats.numRecords)
}