dags/broken_site_report_ml.py (38 lines of code) (raw):
from datetime import datetime
from airflow import DAG
from operators.gcp_container_operator import GKEPodOperator
from utils.tags import Tag
DOCS = """
### ML classification of broken site reports
#### Description
Runs a Docker image that does the following:
1. Translates incoming broken sites reports to English with ML.TRANSLATE.
2. Classifies translated reports as valid/invalid using [bugbug](https://github.com/mozilla/bugbug).
3. Stores translation and classification results in BQ.
The container is defined in
[docker-etl](https://github.com/mozilla/docker-etl/tree/main/jobs/broken-site-report-ml)
*Triage notes*
As long as the most recent DAG run is successful this job doesn't need to be triaged.
#### Owner
kberezina@mozilla.com
"""
default_args = {
"owner": "kberezina@mozilla.com",
"email": ["kberezina@mozilla.com", "webcompat-internal@mozilla.org"],
"depends_on_past": False,
"start_date": datetime(2023, 12, 21),
"email_on_failure": True,
}
tags = [
Tag.ImpactTier.tier_2,
]
every_fifteen_minutes = "*/15 * * * *"
with DAG(
"broken_site_report_ml",
default_args=default_args,
max_active_runs=1,
doc_md=DOCS,
schedule_interval=every_fifteen_minutes,
tags=tags,
catchup=False,
) as dag:
broken_site_report_ml = GKEPodOperator(
task_id="broken_site_report_ml",
arguments=[
"python",
"broken_site_report_ml/main.py",
"--bq_project_id",
"moz-fx-dev-dschubert-wckb",
"--bq_dataset_id",
"webcompat_user_reports",
],
image="gcr.io/moz-fx-data-airflow-prod-88e0/broken-site-report-ml_docker_etl:latest",
dag=dag,
)