in pydeequ/profiles.py [0:0]
def __init__(self, spark_session: SparkSession, column, java_column_profile):
if not isinstance(spark_session, SparkSession):
raise TypeError(f"Expected SparkSession object for spark_session, not {type(spark_session)}")
self._spark_session = spark_session
self._sc = spark_session.sparkContext
self._jvm = spark_session._jvm
self._java_column_profile = java_column_profile
self._column = column
self._completeness = java_column_profile.completeness()
self._approximateNumDistinctValues = java_column_profile.approximateNumDistinctValues()
self._dataType = java_column_profile.dataType()
self._typeCounts = scala_map_to_dict(self._jvm, java_column_profile.typeCounts())
self._isDataTypeInferred = java_column_profile.isDataTypeInferred() == "true"
if get_or_else_none(self._java_column_profile.histogram()):
self._histogram = [
DistributionValue(k, v.absolute(), v.ratio())
for k, v in scala_map_to_java_map(
self._jvm, self._java_column_profile.histogram().get().values()
).items()
]
else:
self._histogram = None