10_mlops/components/noop_create_dataset.yaml (21 lines of code) (raw):
name: noop_create_dataset
description: Converts JPEG files to TensorFlow Records using Dataflow or Apache Beam
inputs:
- {name: runner, type: str, default: 'DirectRunner', description: 'DirectRunner or DataflowRunner'}
- {name: project_id, type: str, description: 'Project to bill Dataflow job to'}
- {name: region, type: str, description: 'Region to run Dataflow job in'}
- {name: input_csv, type: GCSPath, description: 'Path to CSV file'}
- {name: output_dir, type: GCSPath, description: 'Top-level directory for TF records'}
- {name: labels_dict, type: GCSPath, description: 'Dictionary file for class names'}
outputs:
- {name: tfrecords_topdir, type: GCSPath, description: 'Top-level directory for TF records'}
implementation:
container:
image: gcr.io/ai-analytics-solutions/practical-ml-vision-book:latest
command: [
"bash", "/src/practical-ml-vision-book/10_mlops/components/noop_create_dataset.sh"
]
args: [
{inputValue: output_dir},
{outputPath: tfrecords_topdir}
]