dags/crash_symbolication.py (115 lines of code) (raw):
"""
Generates "Weekly report of modules with missing symbols in crash reports" and sends it to the Stability list.
Generates correlations data for top crashers.
Uses crash report data imported from Socorro.
"""
import datetime
from airflow import DAG
from airflow.operators.subdag import SubDagOperator
from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
from airflow.sensors.external_task import ExternalTaskSensor
from utils.constants import ALLOWED_STATES, FAILED_STATES
from utils.dataproc import get_dataproc_parameters, moz_dataproc_pyspark_runner
from utils.tags import Tag
default_args = {
"owner": "srose@mozilla.com",
"depends_on_past": False,
"start_date": datetime.datetime(2020, 11, 26),
"email": [
"mcastelluccio@mozilla.com",
"srose@mozilla.com",
"telemetry-alerts@mozilla.com",
],
"email_on_failure": True,
"email_on_retry": True,
"retries": 2,
"retry_delay": datetime.timedelta(minutes=30),
}
PIP_PACKAGES = [
"boto3==1.16.20",
"scipy==1.5.4",
"google-cloud-storage==2.7.0",
]
tags = [Tag.ImpactTier.tier_3]
with DAG(
"crash_symbolication",
default_args=default_args,
# dag runs daily but tasks only run on certain days
schedule_interval="0 5 * * *",
tags=tags,
doc_md=__doc__,
) as dag:
# modules_with_missing_symbols sends results as email
ses_aws_conn_id = "aws_data_iam_ses"
ses_access_key, ses_secret_key, _ = AwsBaseHook(
aws_conn_id=ses_aws_conn_id, client_type="s3"
).get_credentials()
wait_for_socorro_import = ExternalTaskSensor(
task_id="wait_for_socorro_import",
external_dag_id="socorro_import",
external_task_id="bigquery_load",
check_existence=True,
execution_delta=datetime.timedelta(hours=5),
mode="reschedule",
allowed_states=ALLOWED_STATES,
failed_states=FAILED_STATES,
pool="DATA_ENG_EXTERNALTASKSENSOR",
email_on_retry=False,
)
params = get_dataproc_parameters("google_cloud_airflow_dataproc")
modules_with_missing_symbols = SubDagOperator(
task_id="modules_with_missing_symbols",
subdag=moz_dataproc_pyspark_runner(
parent_dag_name=dag.dag_id,
image_version="1.5-debian10",
dag_name="modules_with_missing_symbols",
default_args=default_args,
cluster_name="modules-with-missing-symbols-{{ ds }}",
job_name="modules-with-missing-symbols",
python_driver_code="https://raw.githubusercontent.com/mozilla/python_mozetl/main/mozetl/symbolication/modules_with_missing_symbols.py",
init_actions_uris=[
"gs://dataproc-initialization-actions/python/pip-install.sh"
],
additional_metadata={"PIP_PACKAGES": " ".join(PIP_PACKAGES)},
additional_properties={
"spark:spark.jars": "gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar",
"spark-env:AWS_ACCESS_KEY_ID": ses_access_key,
"spark-env:AWS_SECRET_ACCESS_KEY": ses_secret_key,
},
py_args=["--run-on-days", "0", "--date", "{{ ds }}"], # run monday
idle_delete_ttl=14400,
num_workers=2,
worker_machine_type="n1-standard-4",
gcp_conn_id=params.conn_id,
service_account=params.client_email,
storage_bucket=params.storage_bucket,
),
)
top_signatures_correlations = SubDagOperator(
task_id="top_signatures_correlations",
subdag=moz_dataproc_pyspark_runner(
parent_dag_name=dag.dag_id,
image_version="1.5-debian10",
dag_name="top_signatures_correlations",
default_args=default_args,
cluster_name="top-signatures-correlations-{{ ds }}",
job_name="top-signatures-correlations",
python_driver_code="https://raw.githubusercontent.com/mozilla/python_mozetl/main/mozetl/symbolication/top_signatures_correlations.py",
init_actions_uris=[
"gs://dataproc-initialization-actions/python/pip-install.sh"
],
additional_metadata={"PIP_PACKAGES": " ".join(PIP_PACKAGES)},
additional_properties={
"spark:spark.jars": "gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar",
},
py_args=[
# run monday, wednesday, and friday
"--run-on-days",
"0",
"2",
"4",
"--date",
"{{ ds }}",
],
idle_delete_ttl=14400,
num_workers=2,
worker_machine_type="n1-standard-8",
gcp_conn_id=params.conn_id,
service_account=params.client_email,
storage_bucket=params.storage_bucket,
),
)
wait_for_socorro_import >> modules_with_missing_symbols
wait_for_socorro_import >> top_signatures_correlations