10_mlops/components/create_dataset.yaml (27 lines of code) (raw):

name: create_dataset description: Converts JPEG files to TensorFlow Records using Dataflow or Apache Beam inputs: - {name: runner, type: str, default: 'DirectRunner', description: 'DirectRunner or DataflowRunner'} - {name: project_id, type: str, description: 'Project to bill Dataflow job to'} - {name: region, type: str, description: 'Region to run Dataflow job in'} - {name: input_csv, type: GCSPath, description: 'Path to CSV file'} - {name: output_dir, type: GCSPath, description: 'Top-level directory for TF records'} - {name: labels_dict, type: GCSPath, description: 'Dictionary file for class names'} outputs: - {name: tfrecords_topdir, type: GCSPath, description: 'Top-level directory for TF records'} implementation: container: image: gcr.io/ai-analytics-solutions/practical-ml-vision-book:latest command: [ "bash", "/src/practical-ml-vision-book/10_mlops/components/create_dataset.sh" ] args: [ {inputValue: output_dir}, {outputPath: tfrecords_topdir}, "--all_data", {inputValue: input_csv}, "--labels_file", {inputValue: labels_dict}, "--project_id", {inputValue: project_id}, "--output_dir", {inputValue: output_dir}, "--runner", {inputValue: runner}, "--region", {inputValue: region}, ]