def get_dataproc_parameters()

in utils/dataproc.py [0:0]


def get_dataproc_parameters(conn_id="google_cloud_airflow_dataproc"):
    """
    Can be used to gather parameters that correspond to development parameters.

    The provided connection string should be a Google Cloud connection
    and should either be the production default ("dataproc-runner-prod"), or a
    service key associated with a sandbox account.
    """
    dev_project_id = "replace_me"
    dev_client_email = "replace_me"

    is_dev = os.environ.get("DEPLOY_ENVIRONMENT") == "dev"
    project_id = "airflow-dataproc" if is_dev else dev_project_id
    client_email = (
        dev_client_email
        if is_dev
        else "dataproc-runner-prod@airflow-dataproc.iam.gserviceaccount.com"
    )
    artifact_bucket = (
        f"{project_id}-dataproc-artifacts"
        if is_dev
        else "moz-fx-data-prod-airflow-dataproc-artifacts"
    )
    storage_bucket = (
        f"{project_id}-dataproc-scratch"
        if is_dev
        else "moz-fx-data-prod-dataproc-scratch"
    )
    output_bucket = artifact_bucket if is_dev else "airflow-dataproc-bq-parquet-exports"
    return DataprocParameters(
        conn_id,
        project_id,
        is_dev,
        client_email,
        artifact_bucket,
        storage_bucket,
        output_bucket,
    )