def _extract_graph_data_from_dataset_feature_statistics()

in model_card_toolkit/utils/graphics.py [0:0]


def _extract_graph_data_from_dataset_feature_statistics(
    feature_stats: statistics_pb2.FeatureNameStatistics,
    color: Optional[str] = None) -> Union[_Graph, None]:
  """Generates a _Graph object based on the histograms of feature_stats.

  Each bar in the histogram corresponds to a bucket in histogram.buckets.

  The bar heights are determined by the `sample_count` field of
  histogram.buckets. The bar labels are determined by the `label` field in a
  RankHistogram, or the bucket's `low_value` and `high_value` endpoints in a
  Histogram.

  Args:
    feature_stats: a FeatureNameStatistics proto.
    color: the colors of the barchart.

  Returns:
    _Graph or None if feature_stats is not num_stats or string_stats.
  """
  feature_name = feature_stats.name or feature_stats.path.step[0]
  graph = _Graph()

  if feature_stats.HasField('num_stats') and feature_stats.num_stats.histograms:
    # Only generate graph for the first histogram.
    # The second one is QUANTILES graph.
    histogram = feature_stats.num_stats.histograms[0]
    graph.x = [int(bucket.sample_count) for bucket in histogram.buckets]
    graph.xlabel = 'counts'
    graph.y = [
        f'{bucket.low_value:.2f}-{bucket.high_value:.2f}'
        for bucket in histogram.buckets
    ]
    graph.ylabel = 'buckets'
    graph.title = f'counts | {feature_name}' if feature_name else 'counts'
    graph.name = f'counts | {feature_name}' if feature_name else 'counts'
    if color:
      graph.color = color
    return graph

  if feature_stats.HasField('string_stats'):
    rank_histogram = feature_stats.string_stats.rank_histogram
    graph.x = [int(bucket.sample_count) for bucket in rank_histogram.buckets]
    graph.xlabel = 'counts'
    graph.y = [bucket.label for bucket in rank_histogram.buckets]
    graph.ylabel = 'buckets'
    graph.title = f'counts | {feature_name}' if feature_name else 'counts'
    graph.name = f'counts | {feature_name}' if feature_name else 'counts'
    if color:
      graph.color = color
    return graph

  logging.warning('Did not generate a graph for feature %s: '
                  'FeatureNameStatistics must have string_stats or num_stats',
                  feature_name)
  return None