perfkitbenchmarker/configs/dataflow_template.yaml (54 lines of code) (raw):

################################# # Worker machine configs ################################# sixteen_core: &sixteen_core vm_spec: GCP: machine_type: n1-standard-16 disk_spec: GCP: disk_size: 300 eight_core: &eight_core vm_spec: GCP: machine_type: n1-standard-8 disk_spec: GCP: disk_size: 300 four_core: &four_core vm_spec: GCP: machine_type: n1-standard-4 disk_spec: GCP: disk_size: 300 two_core: &two_core vm_spec: GCP: machine_type: n1-standard-2 disk_spec: GCP: disk_size: 300 ################################################################## # Benchmark flags specifying Dataflow template and parameters ################################################################## flags: &myflags dpb_service_zone: us-central1-a dpb_dataflow_timeout: 1800 # 30 minutes dpb_dataflow_temp_location: gs://<MY_BUCKET>/temp dpb_dataflow_template_gcs_location: gs://dataflow-templates/latest/PubSub_Subscription_to_BigQuery dpb_dataflow_template_input_subscription: projects/<MY_PROJECT>/subscriptions/perftest-pubsub-input-sub-1M dpb_dataflow_template_output_ptransform: WriteSuccessfulRecords/StreamingInserts/StreamingWriteTables/StreamingWrite/BatchedStreamingWrite.ViaBundleFinalization/ParMultiDo(BatchAndInsertElements) dpb_dataflow_template_additional_args: - outputTableSpec=<MY_PROJECT>:test_dataset.test_run ################################# # Benchmark variations to run ################################# benchmarks: - dpb_dataflow_template_benchmark: { dpb_service: { service_type: dataflow_template, worker_count: 1, worker_group: *sixteen_core }, flags: *myflags } - dpb_dataflow_template_benchmark: { dpb_service: { service_type: dataflow_template, worker_count: 1, worker_group: *eight_core }, flags: *myflags } - dpb_dataflow_template_benchmark: { dpb_service: { service_type: dataflow_template, worker_count: 1, worker_group: *four_core }, flags: *myflags } - dpb_dataflow_template_benchmark: { dpb_service: { service_type: dataflow_template, worker_count: 1, worker_group: *two_core }, flags: *myflags } ################################# # Alternative benchmark config examples ################################# # dpb_dataflow_template_benchmark: # description: Run Dataflow template across several versions # dpb_service: # service_type: dataflow_template # worker_count: 1 # worker_group: *four_core # flag_matrix: recent_versions # flag_matrix_defs: # recent_versions: # dpb_dataflow_template_gcs_location: # - gs://dataflow-templates/2022-07-04-00_RC00/PubSub_Subscription_to_BigQuery # - gs://dataflow-templates/2022-06-27-00_RC00/PubSub_Subscription_to_BigQuery # - gs://dataflow-templates/2022-06-21-00_RC00/PubSub_Subscription_to_BigQuery # - gs://dataflow-templates/2022-06-06-00_RC00/PubSub_Subscription_to_BigQuery # - gs://dataflow-templates/2022-05-30-00_RC00/PubSub_Subscription_to_BigQuery # dpb_dataflow_template_benchmark: # description: Run Dataflow template across several input sizes # dpb_service: # service_type: dataflow_template # worker_count: 1 # worker_group: *four_core # flag_matrix: input_sizes # flag_matrix_defs: # input_sizes: # dpb_dataflow_template_input_subscription: # - projects/<MY_PROJECT>/subscriptions/perftest-pubsub-input-sub-100k # - projects/<MY_PROJECT>/subscriptions/perftest-pubsub-input-sub-1M # - projects/<MY_PROJECT>/subscriptions/perftest-pubsub-input-sub-10M