perfkitbenchmarker/configs/dataflow_wordcount.yaml (41 lines of code) (raw):
#################################
# Worker machine configs
#################################
eight_core: &eight_core
vm_spec:
GCP:
machine_type: n1-standard-8
disk_spec:
GCP:
disk_size: 300
four_core: &four_core
vm_spec:
GCP:
machine_type: n1-standard-4
disk_spec:
GCP:
disk_size: 300
two_core: &two_core
vm_spec:
GCP:
machine_type: n1-standard-2
disk_spec:
GCP:
disk_size: 300
##################################################################
# Benchmark flags specifying Dataflow template and parameters
##################################################################
flags: &myflags
dpb_service_zone: us-central1-a
dpb_wordcount_out_base: <MY_BUCKET>
dpb_dataflow_temp_location: gs://<MY_BUCKET>/temp
dpb_dataflow_staging_location: gs://<MY_BUCKET>/temp
dpb_job_jarfile: ./word-count-beam/target/word-count-beam-bundled-0.1.jar
dpb_job_classname: org.apache.beam.examples.WordCount
#################################
# Benchmark variations to run
#################################
benchmarks:
- dpb_wordcount_benchmark: {
dpb_service: { service_type: dataflow, worker_count: 1, worker_group: *eight_core },
flags: *myflags
}
- dpb_wordcount_benchmark: {
dpb_service: { service_type: dataflow, worker_count: 1, worker_group: *four_core },
flags: *myflags
}
- dpb_wordcount_benchmark: {
dpb_service: { service_type: dataflow, worker_count: 1, worker_group: *two_core },
flags: *myflags
}
#################################
# Alternative benchmark config examples
#################################
# dpb_wordcount_benchmark:
# description: Run word count on dataflow
# dpb_service:
# service_type: dataflow
# worker_count: 1
# worker_group:
# vm_spec:
# GCP:
# machine_type: n1-standard-4
# disk_spec:
# GCP:
# disk_size: 300
# dpb_wordcount_benchmark:
# description: Run word count on dataflow
# dpb_service:
# service_type: dataflow
# worker_count: 1
# worker_group:
# vm_spec:
# GCP:
# machine_type: n1-standard-4
# disk_spec:
# GCP:
# disk_size: 300
# flag_matrix: cross_runners
# flag_matrix_defs:
# cross_runners:
# dpb_dataflow_runner: [DataflowRunner, DirectRunner]