in utils/dataproc.py [0:0]
def moz_dataproc_pyspark_runner(
parent_dag_name=None,
dag_name="run_pyspark_on_dataproc",
default_args=None,
cluster_name=None,
num_workers=2,
image_version="1.4-debian10",
region="us-west1",
subnetwork_uri=None,
internal_ip_only=None,
idle_delete_ttl=10800,
auto_delete_ttl=21600,
master_machine_type="n1-standard-8",
worker_machine_type="n1-standard-4",
num_preemptible_workers=0,
service_account="dataproc-runner-prod@airflow-dataproc.iam.gserviceaccount.com",
init_actions_uris=None,
additional_metadata=None,
additional_properties=None,
optional_components=None,
install_component_gateway=True,
python_driver_code=None,
py_args=None,
job_name=None,
aws_conn_id=None,
gcp_conn_id="google_cloud_airflow_dataproc",
project_id="airflow-dataproc",
artifact_bucket="moz-fx-data-prod-airflow-dataproc-artifacts",
storage_bucket="moz-fx-data-prod-dataproc-scratch",
master_disk_type="pd-standard",
worker_disk_type="pd-standard",
master_disk_size=1024,
worker_disk_size=1024,
master_num_local_ssds=0,
worker_num_local_ssds=0,