perfkitbenchmarker/configs/dataflow_template.yaml (54 lines of code) (raw):
#################################
# Worker machine configs
#################################
sixteen_core: &sixteen_core
vm_spec:
GCP:
machine_type: n1-standard-16
disk_spec:
GCP:
disk_size: 300
eight_core: &eight_core
vm_spec:
GCP:
machine_type: n1-standard-8
disk_spec:
GCP:
disk_size: 300
four_core: &four_core
vm_spec:
GCP:
machine_type: n1-standard-4
disk_spec:
GCP:
disk_size: 300
two_core: &two_core
vm_spec:
GCP:
machine_type: n1-standard-2
disk_spec:
GCP:
disk_size: 300
##################################################################
# Benchmark flags specifying Dataflow template and parameters
##################################################################
flags: &myflags
dpb_service_zone: us-central1-a
dpb_dataflow_timeout: 1800 # 30 minutes
dpb_dataflow_temp_location: gs://<MY_BUCKET>/temp
dpb_dataflow_template_gcs_location: gs://dataflow-templates/latest/PubSub_Subscription_to_BigQuery
dpb_dataflow_template_input_subscription: projects/<MY_PROJECT>/subscriptions/perftest-pubsub-input-sub-1M
dpb_dataflow_template_output_ptransform: WriteSuccessfulRecords/StreamingInserts/StreamingWriteTables/StreamingWrite/BatchedStreamingWrite.ViaBundleFinalization/ParMultiDo(BatchAndInsertElements)
dpb_dataflow_template_additional_args:
- outputTableSpec=<MY_PROJECT>:test_dataset.test_run
#################################
# Benchmark variations to run
#################################
benchmarks:
- dpb_dataflow_template_benchmark: {
dpb_service: { service_type: dataflow_template, worker_count: 1, worker_group: *sixteen_core },
flags: *myflags
}
- dpb_dataflow_template_benchmark: {
dpb_service: { service_type: dataflow_template, worker_count: 1, worker_group: *eight_core },
flags: *myflags
}
- dpb_dataflow_template_benchmark: {
dpb_service: { service_type: dataflow_template, worker_count: 1, worker_group: *four_core },
flags: *myflags
}
- dpb_dataflow_template_benchmark: {
dpb_service: { service_type: dataflow_template, worker_count: 1, worker_group: *two_core },
flags: *myflags
}
#################################
# Alternative benchmark config examples
#################################
# dpb_dataflow_template_benchmark:
# description: Run Dataflow template across several versions
# dpb_service:
# service_type: dataflow_template
# worker_count: 1
# worker_group: *four_core
# flag_matrix: recent_versions
# flag_matrix_defs:
# recent_versions:
# dpb_dataflow_template_gcs_location:
# - gs://dataflow-templates/2022-07-04-00_RC00/PubSub_Subscription_to_BigQuery
# - gs://dataflow-templates/2022-06-27-00_RC00/PubSub_Subscription_to_BigQuery
# - gs://dataflow-templates/2022-06-21-00_RC00/PubSub_Subscription_to_BigQuery
# - gs://dataflow-templates/2022-06-06-00_RC00/PubSub_Subscription_to_BigQuery
# - gs://dataflow-templates/2022-05-30-00_RC00/PubSub_Subscription_to_BigQuery
# dpb_dataflow_template_benchmark:
# description: Run Dataflow template across several input sizes
# dpb_service:
# service_type: dataflow_template
# worker_count: 1
# worker_group: *four_core
# flag_matrix: input_sizes
# flag_matrix_defs:
# input_sizes:
# dpb_dataflow_template_input_subscription:
# - projects/<MY_PROJECT>/subscriptions/perftest-pubsub-input-sub-100k
# - projects/<MY_PROJECT>/subscriptions/perftest-pubsub-input-sub-1M
# - projects/<MY_PROJECT>/subscriptions/perftest-pubsub-input-sub-10M