def _make_feature_stats_proto()

in tensorflow_data_validation/statistics/generators/basic_stats_generator.py [0:0]


def _make_feature_stats_proto(
    feature_path: types.FeaturePath, basic_stats: _PartialBasicStats,
    parent_basic_stats: Optional[_PartialBasicStats],
    make_quantiles_sketch_fn: Callable[[], sketches.QuantilesSketch],
    num_values_histogram_buckets: int, num_histogram_buckets: int,
    num_quantiles_histogram_buckets: int, is_bytes: bool,
    categorical_numeric_types: Mapping[types.FeaturePath,
                                       'schema_pb2.FeatureType'],
    has_weights: bool, num_examples: int,
    weighted_num_examples: int) -> statistics_pb2.FeatureNameStatistics:
  """Convert the partial basic stats into a FeatureNameStatistics proto.

  Args:
    feature_path: The path of the feature.
    basic_stats: The partial basic stats associated with the feature.
    parent_basic_stats: The partial basic stats of the parent of the feature.
    make_quantiles_sketch_fn: A callable to create a quantiles sketch.
    num_values_histogram_buckets: Number of buckets in the quantiles
        histogram for the number of values per feature.
    num_histogram_buckets: Number of buckets in a standard
        NumericStatistics.histogram with equal-width buckets.
    num_quantiles_histogram_buckets: Number of buckets in a
        quantiles NumericStatistics.histogram.
    is_bytes: A boolean indicating whether the feature is bytes.
    categorical_numeric_types: A mapping from feature path to type derived from
        the schema.
    has_weights: A boolean indicating whether a weight feature is specified.
    num_examples: The global (across feature) number of examples.
    weighted_num_examples: The global (across feature) weighted number of
      examples.

  Returns:
    A statistics_pb2.FeatureNameStatistics proto.
  """
  # Create a new FeatureNameStatistics proto.
  result = statistics_pb2.FeatureNameStatistics()
  result.path.CopyFrom(feature_path.to_proto())
  # Set the feature type.
  inferred_type = basic_stats.common_stats.type
  if inferred_type is not None:
    # The user claims the feature to be BYTES. Only trust them if the inferred
    # type is STRING (which means the actual data is in strings/bytes). We
    # never infer BYTES.
    if (is_bytes and
        inferred_type == statistics_pb2.FeatureNameStatistics.STRING):
      result.type = statistics_pb2.FeatureNameStatistics.BYTES
    else:
      result.type = inferred_type
  # The inferred type being None means we don't see any value for this feature.
  # We trust user's claim.
  elif is_bytes:
    result.type = statistics_pb2.FeatureNameStatistics.BYTES
  else:
    # We don't have an "unknown" type, so default to STRING here.
    result.type = statistics_pb2.FeatureNameStatistics.STRING

  # Construct common statistics proto.
  common_stats_proto = _make_common_stats_proto(
      basic_stats.common_stats, parent_basic_stats.common_stats
      if parent_basic_stats is not None else None, make_quantiles_sketch_fn,
      num_values_histogram_buckets, has_weights, num_examples,
      weighted_num_examples)

  # this is the total number of values at the leaf level.
  total_num_values = (
      0 if basic_stats.common_stats.presence_and_valency_stats is None else
      basic_stats.common_stats.presence_and_valency_stats[-1].total_num_values)

  # Copy the common stats into appropriate numeric/string stats.
  # If the type is not set, we currently wrap the common stats
  # within numeric stats.
  if result.type == statistics_pb2.FeatureNameStatistics.BYTES:
    # Construct bytes statistics proto.
    bytes_stats_proto = _make_bytes_stats_proto(
        basic_stats.bytes_stats, common_stats_proto.tot_num_values)
    # Add the common stats into bytes stats.
    bytes_stats_proto.common_stats.CopyFrom(common_stats_proto)
    result.bytes_stats.CopyFrom(bytes_stats_proto)
  # TODO(b/187054148): Update to allow FLOAT
  if (result.type == statistics_pb2.FeatureNameStatistics.STRING or
      top_k_uniques_stats_util.output_categorical_numeric(
          categorical_numeric_types, feature_path, result.type)):
    # Construct string statistics proto.
    string_stats_proto = _make_string_stats_proto(basic_stats.string_stats,
                                                  total_num_values)
    # Add the common stats into string stats.
    string_stats_proto.common_stats.CopyFrom(common_stats_proto)
    result.string_stats.CopyFrom(string_stats_proto)
  elif result.type == statistics_pb2.FeatureNameStatistics.STRUCT:
    result.struct_stats.common_stats.CopyFrom(common_stats_proto)
  elif result.type in (statistics_pb2.FeatureNameStatistics.INT,
                       statistics_pb2.FeatureNameStatistics.FLOAT):
    # Construct numeric statistics proto.
    numeric_stats_proto = _make_numeric_stats_proto(
        basic_stats.numeric_stats, total_num_values,
        num_histogram_buckets, num_quantiles_histogram_buckets, has_weights)
    # Add the common stats into numeric stats.
    numeric_stats_proto.common_stats.CopyFrom(common_stats_proto)
    result.num_stats.CopyFrom(numeric_stats_proto)

  result.custom_stats.extend(_make_num_values_custom_stats_proto(
      basic_stats.common_stats,
      num_values_histogram_buckets))
  return result