in utils/dataproc.py [0:0]
def get_dataproc_parameters(conn_id="google_cloud_airflow_dataproc"):
"""
Can be used to gather parameters that correspond to development parameters.
The provided connection string should be a Google Cloud connection
and should either be the production default ("dataproc-runner-prod"), or a
service key associated with a sandbox account.
"""
dev_project_id = "replace_me"
dev_client_email = "replace_me"
is_dev = os.environ.get("DEPLOY_ENVIRONMENT") == "dev"
project_id = "airflow-dataproc" if is_dev else dev_project_id
client_email = (
dev_client_email
if is_dev
else "dataproc-runner-prod@airflow-dataproc.iam.gserviceaccount.com"
)
artifact_bucket = (
f"{project_id}-dataproc-artifacts"
if is_dev
else "moz-fx-data-prod-airflow-dataproc-artifacts"
)
storage_bucket = (
f"{project_id}-dataproc-scratch"
if is_dev
else "moz-fx-data-prod-dataproc-scratch"
)
output_bucket = artifact_bucket if is_dev else "airflow-dataproc-bq-parquet-exports"
return DataprocParameters(
conn_id,
project_id,
is_dev,
client_email,
artifact_bucket,
storage_bucket,
output_bucket,
)