in tensorflow_data_validation/statistics/stats_impl.py [0:0]
def get_generators(options: stats_options.StatsOptions,
in_memory: bool = False
) -> List[stats_generator.StatsGenerator]:
"""Initializes the list of stats generators, including custom generators.
Args:
options: A StatsOptions object.
in_memory: Whether the generators will be used to generate statistics in
memory (True) or using Beam (False).
Returns:
A list of stats generator objects.
"""
generators = []
if options.add_default_generators:
generators.extend(_get_default_generators(options, in_memory))
if options.generators:
# Add custom stats generators.
generators.extend(options.generators)
if options.enable_semantic_domain_stats:
semantic_domain_feature_stats_generators = [
image_stats_generator.ImageStatsGenerator(),
natural_language_domain_inferring_stats_generator
.NLDomainInferringStatsGenerator(),
time_stats_generator.TimeStatsGenerator(),
]
# Wrap semantic domain feature stats generators as a separate combiner
# stats generator, so that we can apply sampling only for those and other
# feature stats generators are not affected by it.
generators.append(
CombinerFeatureStatsWrapperGenerator(
semantic_domain_feature_stats_generators,
sample_rate=options.semantic_domain_stats_sample_rate))
if options.schema is not None:
if _schema_has_sparse_features(options.schema):
generators.append(
sparse_feature_stats_generator.SparseFeatureStatsGenerator(
options.schema))
if _schema_has_natural_language_domains(options.schema):
generators.append(
natural_language_stats_generator.NLStatsGenerator(
options.schema, options.vocab_paths,
options.num_histogram_buckets,
options.num_quantiles_histogram_buckets,
options.num_rank_histogram_buckets))
if options.schema.weighted_feature:
generators.append(
weighted_feature_stats_generator.WeightedFeatureStatsGenerator(
options.schema))
if options.label_feature and not in_memory:
# The LiftStatsGenerator is not a CombinerStatsGenerator and therefore
# cannot currenty be used for in_memory executions.
generators.append(
lift_stats_generator.LiftStatsGenerator(
y_path=types.FeaturePath([options.label_feature]),
schema=options.schema,
example_weight_map=options.example_weight_map,
output_custom_stats=True))
# Replace all CombinerFeatureStatsGenerator with a single
# CombinerFeatureStatsWrapperGenerator.
feature_generators = [
x for x in generators
if isinstance(x, stats_generator.CombinerFeatureStatsGenerator)
]
if feature_generators:
generators = [
x for x in generators
if not isinstance(x, stats_generator.CombinerFeatureStatsGenerator)
] + [
CombinerFeatureStatsWrapperGenerator(feature_generators)
]
if in_memory:
for generator in generators:
if not isinstance(generator, stats_generator.CombinerStatsGenerator):
raise TypeError('Statistics generator used in '
'generate_statistics_in_memory must '
'extend CombinerStatsGenerator, found object of '
'type %s.' % generator.__class__.__name__)
return generators