in tfx_addons/schema_curation/example/taxi_example_local.py [0:0]
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
metadata_path: Text) -> pipeline.Pipeline:
"""Implements the chicago taxi pipeline with TFX."""
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input_base=data_root)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
# inferes a schema
schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
infer_feature_shape=True)
# modifies infered schema with use of udf `schema_fn` defined in module file
schema_curation = component.SchemaCuration(
schema=schema_gen.outputs['schema'],
module_file=os.path.join('schemacomponent', 'example', 'module_file.py'))
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[example_gen, statistics_gen, schema_gen, schema_curation],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path))