in tensorflow_data_validation/statistics/generators/basic_stats_generator.py [0:0]
def _make_numeric_stats_proto(
numeric_stats: _PartialNumericStats,
total_num_values: int,
num_histogram_buckets: int,
num_quantiles_histogram_buckets: int,
has_weights: bool
) -> statistics_pb2.NumericStatistics:
"""Convert the partial numeric statistics into NumericStatistics proto."""
result = statistics_pb2.NumericStatistics()
if numeric_stats.num_nan > 0:
total_num_values -= numeric_stats.num_nan
if total_num_values == 0:
# If we only have nan values, we only set num_nan.
if numeric_stats.num_nan > 0:
result.histograms.add(type=statistics_pb2.Histogram.STANDARD).num_nan = (
numeric_stats.num_nan)
result.histograms.add(type=statistics_pb2.Histogram.QUANTILES).num_nan = (
numeric_stats.num_nan)
return result
result.mean = float(numeric_stats.mean_var_accumulator.mean)
result.std_dev = math.sqrt(
max(0, numeric_stats.mean_var_accumulator.variance))
result.num_zeros = numeric_stats.num_zeros
result.min = float(numeric_stats.min)
result.max = float(numeric_stats.max)
# Extract the quantiles from the summary.
assert numeric_stats.quantiles_summary is not None
quantiles = (
numeric_stats.quantiles_summary.GetQuantiles(
max(num_quantiles_histogram_buckets,
_NUM_QUANTILES_FACTOR_FOR_STD_HISTOGRAM *
num_histogram_buckets)).flatten().to_pylist())
# Find the median from the quantiles and update the numeric stats proto.
result.median = float(quantiles_util.find_median(quantiles))
# Construct the equi-width histogram from the quantiles and add it to the
# numeric stats proto.
std_histogram = quantiles_util.generate_equi_width_histogram(
quantiles, numeric_stats.finite_min, numeric_stats.finite_max,
total_num_values, num_histogram_buckets)
std_histogram.num_nan = numeric_stats.num_nan
new_std_histogram = result.histograms.add()
new_std_histogram.CopyFrom(std_histogram)
# Construct the quantiles histogram from the quantiles and add it to the
# numeric stats proto.
q_histogram = quantiles_util.generate_quantiles_histogram(
quantiles, total_num_values, num_quantiles_histogram_buckets)
q_histogram.num_nan = numeric_stats.num_nan
new_q_histogram = result.histograms.add()
new_q_histogram.CopyFrom(q_histogram)
# Add weighted numeric stats to the proto.
if has_weights:
assert numeric_stats.weighted_mean_var_accumulator is not None
weighted_numeric_stats_proto = statistics_pb2.WeightedNumericStatistics()
weighted_total_num_values = (
numeric_stats.weighted_mean_var_accumulator.weights_mean *
numeric_stats.weighted_mean_var_accumulator.count)
weighted_mean = numeric_stats.weighted_mean_var_accumulator.mean
weighted_variance = max(
0, numeric_stats.weighted_mean_var_accumulator.variance)
weighted_numeric_stats_proto.mean = weighted_mean
weighted_numeric_stats_proto.std_dev = math.sqrt(weighted_variance)
# Extract the weighted quantiles from the summary.
assert numeric_stats.weighted_quantiles_summary is not None
weighted_quantiles = (
numeric_stats.weighted_quantiles_summary.GetQuantiles(
max(num_quantiles_histogram_buckets,
_NUM_QUANTILES_FACTOR_FOR_STD_HISTOGRAM *
num_histogram_buckets)).flatten().to_pylist())
# Find the weighted median from the quantiles and update the proto.
weighted_numeric_stats_proto.median = float(
quantiles_util.find_median(weighted_quantiles))
# Construct the weighted equi-width histogram from the quantiles and
# add it to the numeric stats proto.
weighted_std_histogram = quantiles_util.generate_equi_width_histogram(
weighted_quantiles, numeric_stats.finite_min, numeric_stats.finite_max,
weighted_total_num_values, num_histogram_buckets)
weighted_std_histogram.num_nan = numeric_stats.num_nan
weighted_numeric_stats_proto.histograms.extend([weighted_std_histogram])
# Construct the weighted quantiles histogram from the quantiles and
# add it to the numeric stats proto.
weighted_q_histogram = quantiles_util.generate_quantiles_histogram(
weighted_quantiles, weighted_total_num_values,
num_quantiles_histogram_buckets)
weighted_q_histogram.num_nan = numeric_stats.num_nan
weighted_numeric_stats_proto.histograms.extend([weighted_q_histogram])
result.weighted_numeric_stats.CopyFrom(
weighted_numeric_stats_proto)
return result