dags/looker.py (208 lines of code) (raw):
from datetime import datetime, timedelta
from airflow import DAG
from airflow.models import Variable
from airflow.providers.cncf.kubernetes.secret import Secret
from operators.gcp_container_operator import GKEPodOperator
from utils.tags import Tag
DOCS = """\
# Looker
*Triage notes*
As long as the most recent DAG run is successful this job can be considered healthy.
In such case, past DAG failures can be ignored.
- Failure of the `lookml_generator` task may be due to a new Glean app or changes to
`custom-namespaces.yaml`. In these cases, the task will have created a PR in
[looker-spoke-default](https://github.com/mozilla/looker-spoke-default)
with the title "Auto-push from LookML Generator". These PRs will need to be merged
and the task re-run.
"""
DEFAULT_LOOKML_GENERATOR_IMAGE_VERSION = "v1.17.0"
default_args = {
"owner": "ascholtz@mozilla.com",
"depends_on_past": False,
"start_date": datetime(2024, 5, 21),
"email_on_failure": True,
"email_on_retry": True,
"retries": 2,
"retry_delay": timedelta(minutes=30),
}
tags = [Tag.ImpactTier.tier_1]
looker_repos_secret_git_ssh_key_b64 = Secret(
deploy_type="env",
deploy_target="GIT_SSH_KEY_BASE64",
secret="airflow-gke-secrets",
key="probe_scraper_secret__looker_repos_secret_git_ssh_key_b64",
)
looker_api_client_id_prod = Secret(
deploy_type="env",
deploy_target="LOOKER_API_CLIENT_ID",
secret="airflow-gke-secrets",
key="probe_scraper_secret__looker_api_client_id_prod",
)
looker_api_client_secret_prod = Secret(
deploy_type="env",
deploy_target="LOOKER_API_CLIENT_SECRET",
secret="airflow-gke-secrets",
key="probe_scraper_secret__looker_api_client_secret_prod",
)
looker_api_client_id_staging = Secret(
deploy_type="env",
deploy_target="LOOKER_API_CLIENT_ID",
secret="airflow-gke-secrets",
key="probe_scraper_secret__looker_api_client_id_staging",
)
looker_api_client_secret_staging = Secret(
deploy_type="env",
deploy_target="LOOKER_API_CLIENT_SECRET",
secret="airflow-gke-secrets",
key="probe_scraper_secret__looker_api_client_secret_staging",
)
looker_client_id_prod = Secret(
deploy_type="env",
deploy_target="LOOKER_CLIENT_ID",
secret="airflow-gke-secrets",
key="probe_scraper_secret__looker_api_client_id_prod",
)
looker_client_secret_prod = Secret(
deploy_type="env",
deploy_target="LOOKER_CLIENT_SECRET",
secret="airflow-gke-secrets",
key="probe_scraper_secret__looker_api_client_secret_prod",
)
dataops_looker_github_secret_access_token = Secret(
deploy_type="env",
deploy_target="GITHUB_ACCESS_TOKEN",
secret="airflow-gke-secrets",
key="probe_scraper_secret__dataops_looker_github_secret_access_token",
)
with DAG(
"looker",
doc_md=DOCS,
max_active_runs=1,
default_args=default_args,
schedule_interval=None,
tags=tags,
) as dag:
airflow_gke_prod_kwargs = {
"gcp_conn_id": "google_cloud_airflow_gke",
"project_id": "moz-fx-data-airflow-gke-prod",
"location": "us-west1",
"cluster_name": "workloads-prod-v1",
}
image_tag = Variable.get("lookml_generator_release_str")
if image_tag is None:
image_tag = DEFAULT_LOOKML_GENERATOR_IMAGE_VERSION
lookml_generator_prod = GKEPodOperator(
owner="ascholtz@mozilla.com",
email=[
"ascholtz@mozilla.com",
"telemetry-alerts@mozilla.com",
],
task_id="lookml_generator",
name="lookml-generator-1",
image="gcr.io/moz-fx-data-airflow-prod-88e0/lookml-generator:" + image_tag,
startup_timeout_seconds=500,
dag=dag,
env_vars={
"HUB_REPO_URL": "git@github.com:mozilla/looker-hub.git",
"HUB_BRANCH_SOURCE": "base",
"HUB_BRANCH_PUBLISH": "main",
"SPOKE_REPO_URL": "git@github.com:mozilla/looker-spoke-default.git",
"SPOKE_BRANCH_PUBLISH": "main",
"LOOKER_INSTANCE_URI": "https://mozilla.cloud.looker.com",
"UPDATE_SPOKE_BRANCHES": "true",
},
secrets=[
looker_repos_secret_git_ssh_key_b64,
looker_api_client_id_prod,
looker_api_client_secret_prod,
dataops_looker_github_secret_access_token,
],
**airflow_gke_prod_kwargs,
)
lookml_generator_staging = GKEPodOperator(
owner="ascholtz@mozilla.com",
email=[
"ascholtz@mozilla.com",
"telemetry-alerts@mozilla.com",
],
task_id="lookml_generator_staging",
name="lookml-generator-staging-1",
image="gcr.io/moz-fx-data-airflow-prod-88e0/lookml-generator:latest",
dag=dag,
env_vars={
"HUB_REPO_URL": "git@github.com:mozilla/looker-hub.git",
"HUB_BRANCH_SOURCE": "base",
"HUB_BRANCH_PUBLISH": "main-stage",
"SPOKE_REPO_URL": "git@github.com:mozilla/looker-spoke-default.git",
"SPOKE_BRANCH_PUBLISH": "main-stage",
"LOOKER_INSTANCE_URI": "https://mozillastaging.cloud.looker.com",
"UPDATE_SPOKE_BRANCHES": "true",
},
secrets=[
looker_repos_secret_git_ssh_key_b64,
looker_api_client_id_staging,
looker_api_client_secret_staging,
dataops_looker_github_secret_access_token,
],
**airflow_gke_prod_kwargs,
)
looker_folders_to_validate = [
"706", # KPI metrics
]
validate_content_spectacles = GKEPodOperator(
owner="ascholtz@mozilla.com",
email=[
"ascholtz@mozilla.com",
"telemetry-alerts@mozilla.com",
],
task_id="validate_content_spectacles",
name="validate-content-spectacles",
image="gcr.io/moz-fx-data-airflow-prod-88e0/lookml-generator:latest",
dag=dag,
cmds=["bash", "-x", "-c"],
arguments=[
"spectacles content --verbose"
" --project spoke-default"
" --branch main-validation" # this branch is a mirror of main, but Looker cannot open production branches (like main) for validation
" --pin-imports looker-hub:main"
f" --folders {' '.join(looker_folders_to_validate)}"
],
env_vars={
"LOOKER_BASE_URL": "https://mozilla.cloud.looker.com",
},
secrets=[looker_client_id_prod, looker_client_secret_prod],
**airflow_gke_prod_kwargs,
)
validate_lookml_spoke_default_spectacles = GKEPodOperator(
owner="ascholtz@mozilla.com",
email=[
"ascholtz@mozilla.com",
"telemetry-alerts@mozilla.com",
],
task_id="validate_lookml_spoke_default_spectacles",
name="validate-lookml-spoke-default-spectacles",
image="gcr.io/moz-fx-data-airflow-prod-88e0/lookml-generator:latest",
dag=dag,
cmds=["bash", "-x", "-c"],
arguments=[
"spectacles lookml --verbose"
" --project spoke-default"
" --branch main-validation" # this branch is a mirror of main, but Looker cannot open production branches (like main) for validation
" --remote-reset"
" --pin-imports looker-hub:main"
],
env_vars={
"LOOKER_BASE_URL": "https://mozilla.cloud.looker.com",
},
secrets=[looker_client_id_prod, looker_client_secret_prod],
**airflow_gke_prod_kwargs,
)
delete_outdated_branches = GKEPodOperator(
task_id="delete_outdated_branches",
arguments=[
"python",
"-m",
"looker_utils.main",
"delete-branches",
"--inactive_days=180"
],
image="gcr.io/moz-fx-data-airflow-prod-88e0/looker-utils_docker_etl:latest",
env_vars={
"LOOKER_INSTANCE_URI": "https://mozilla.cloud.looker.com",
},
secrets=[looker_client_id_prod, looker_client_secret_prod],
**airflow_gke_prod_kwargs,
)
lookml_generator_staging >> lookml_generator_prod
lookml_generator_prod >> validate_content_spectacles >> delete_outdated_branches
lookml_generator_prod >> validate_lookml_spoke_default_spectacles >> delete_outdated_branches