in pydeequ/profiles.py [0:0]
def __init__(self, spark_session: SparkSession, column, java_column_profile):
super().__init__(spark_session, column, java_column_profile)
# TODO: self.numRecords = java_column_profile.numRecords()
self._kll = (
BucketDistribution(spark_session, java_column_profile.kll().get())
if get_or_else_none(java_column_profile.kll())
else None
)
self._mean = get_or_else_none(java_column_profile.mean())
self._maximum = get_or_else_none(java_column_profile.maximum())
self._minimum = get_or_else_none(java_column_profile.minimum())
self._sum = get_or_else_none(java_column_profile.sum())
self._stdDev = get_or_else_none(java_column_profile.stdDev())
self._approxPercentiles = (
java_list_to_python_list(str(get_or_else_none(java_column_profile.approxPercentiles())), float)
if get_or_else_none(java_column_profile.approxPercentiles())
else []
)
self.all = {
"completeness": self.completeness,
"approximateNumDistinctValues": self.approximateNumDistinctValues,
"dataType": self.dataType,
"isDataTypeInferred": self.isDataTypeInferred,
"typeCounts": self.typeCounts,
"histogram": self.histogram,
"kll": str(self._kll),
"mean": self._mean,
"maximum": self._maximum,
"minimum": self._minimum,
"sum": self._sum,
"stdDev": self._stdDev,
"approxPercentiles": self._approxPercentiles,
}