dags/dap_collector_ppa_prod.py (64 lines of code) (raw):

from datetime import datetime from airflow import DAG from airflow.providers.cncf.kubernetes.secret import Secret from operators.gcp_container_operator import GKEPodOperator from utils.tags import Tag DOCS = """ ### PPA Prod DAP Collector #### Description Runs a Docker image that collects PPA Prod Environment data from a DAP (Distributed Aggregation Protocol) leader and stores it in BigQuery. The container is defined in [docker-etl](https://github.com/mozilla/docker-etl/tree/main/jobs/dap-collector-ppa-prod) This DAG requires following variables to be defined in Airflow: * dap_ppa_prod_auth_token * dap_ppa_prod_hpke_private_key * dap_ppa_prod_task_config_url * dap_ppa_prod_ad_config_url This job is under active development, occasional failures are expected. #### Owner bbirdsong@mozilla.com """ default_args = { "owner": "bbirdsong@mozilla.com", "email": ["ads-eng@mozilla.com", "bbirdsong@mozilla.com"], "depends_on_past": False, "start_date": datetime(2024, 6, 26), "email_on_failure": True, "email_on_retry": False, "retries": 0, } project_id = "moz-fx-ads-prod" ad_table_id = "ppa.measurements" report_table_id = "ppa.reports" tags = [ Tag.ImpactTier.tier_3, Tag.Triage.no_triage, ] hpke_private_key = Secret( deploy_type="env", deploy_target="HPKE_PRIVATE_KEY", secret="airflow-gke-secrets", key="DAP_PPA_PROD_HPKE_PRIVATE_KEY", ) auth_token = Secret( deploy_type="env", deploy_target="AUTH_TOKEN", secret="airflow-gke-secrets", key="DAP_PPA_PROD_AUTH_TOKEN", ) with DAG( "dap_collector_ppa_prod", default_args=default_args, doc_md=DOCS, schedule_interval="15 0 * * *", tags=tags, catchup=False, ) as dag: dap_collector = GKEPodOperator( task_id="dap_collector_ppa_prod", arguments=[ "python", "dap_collector_ppa_prod/main.py", "--date={{ data_interval_end.at(0) | ts }}", "--task-config-url={{ var.value.dap_ppa_prod_task_config_url }}", "--ad-config-url={{ var.value.dap_ppa_prod_ad_config_url }}", "--project", project_id, "--ad-table-id", ad_table_id, "--report-table-id", report_table_id, ], image="gcr.io/moz-fx-data-airflow-prod-88e0/dap-collector-ppa-prod_docker_etl:latest", gcp_conn_id="google_cloud_airflow_gke", secrets=[ hpke_private_key, auth_token, ], )