software/components/airflow/release.yaml (237 lines of code) (raw):
---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: airflow
namespace: flux-system
labels:
name: airflow
spec:
targetNamespace: airflow
releaseName: airflow
dependsOn:
- name: airflow-dags
namespace: airflow
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 10
strategy: rollback
test:
enable: true
interval: 10m0s
chart:
spec:
chart: airflow
# Specify the desired chart version if needed
# version: 8.5.1
sourceRef:
kind: HelmRepository
name: airflow-community
namespace: flux-system
values:
# triggerer:
# podAnnotations:
# azure.workload.identity/use: "true"
serviceAccount:
create: false
name: workload-identity-sa
airflow:
# podAnnotations:
# azure.workload.identity/use: "true"
image:
repository: apache/airflow
tag: 2.10.1-python3.12
executor: KubernetesExecutor
securityContext:
fsGroup: 1000
runAsUser: 1000
usersUpdate: true
usersTemplates:
ADMIN_PASSWORD:
kind: secret
name: airflow-secrets
key: password
users:
- username: "admin" # This is hardcoded
password: ${ADMIN_PASSWORD}
role: Admin
email: admin@example.com
firstName: admin
lastName: admin
# dbMigrations:
# podAnnotations:
# azure.workload.identity/use: "true"
# extraVolumeMounts:
# ## spec: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#volumemount-v1-core
# - name: logs-volume
# mountPath: /opt/airflow/logs
# extraVolumes:
# ## spec: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#volume-v1-core
# - name: logs-volume
# persistentVolumeClaim:
# claimName: airflow-logs-pvc
# OSDU PIP Packages
# 148 https://community.opengroup.org/osdu/platform/system/sdks/common-python-sdk
# 668 https://community.opengroup.org/osdu/platform/data-flow/ingestion/osdu-airflow-lib
# 823 https://community.opengroup.org/osdu/platform/data-flow/ingestion/osdu-ingestion-lib
extraPipPackages:
# - "apache-airflow-providers-microsoft-azure"
- "--extra-index-url=https://community.opengroup.org/api/v4/projects/148/packages/pypi/simple"
- "osdu-api>=0.27.0,<1.0.0"
- "--extra-index-url=https://community.opengroup.org/api/v4/projects/668/packages/pypi/simple"
- "osdu-airflow>=0.27,<1.0.0"
- "--extra-index-url=https://community.opengroup.org/api/v4/projects/823/packages/pypi/simple"
- "osdu-ingestion>=0.27,<1.0.0"
kubernetesPodTemplate:
serviceAccountName: workload-identity-sa
# annotations:
# azure.workload.identity/use: "true"
extraPipPackages:
# - "apache-airflow-providers-microsoft-azure"
- "--extra-index-url=https://community.opengroup.org/api/v4/projects/148/packages/pypi/simple"
- "osdu-api>=0.27.0,<1.0.0"
- "--extra-index-url=https://community.opengroup.org/api/v4/projects/668/packages/pypi/simple"
- "osdu-airflow>=0.27,<1.0.0"
- "--extra-index-url=https://community.opengroup.org/api/v4/projects/823/packages/pypi/simple"
- "osdu-ingestion>=0.27,<1.0.0"
# Airflow Configuration
config:
AIRFLOW__METRICS__USE_PATTERN_MATCH: "True"
AIRFLOW__LOGGING__LOGGING_LEVEL: "INFO"
AIRFLOW__LOGGING__ENABLE_TASK_CONTEXT_LOGGER: "False"
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: "False"
AIRFLOW__CORE__STORE_SERIALIZED_DAGS: "True"
AIRFLOW__CORE__STORE_DAG_CODE: "True"
AIRFLOW__CORE__PLUGINS_FOLDER: "/opt/airflow/plugins"
AIRFLOW__CORE__PARALLELISM: "2000"
AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG: "2000"
AIRFLOW__CORE__DAG_CONCURRENCY: "2000"
AIRFLOW__CORE__DAG_FILE_PROCESSOR_TIMEOUT: "1500"
# AIRFLOW_VAR_CORE__INGESTION__BATCH_SAVE_SIZE: "500"
# AIRFLOW_VAR_CORE__INGESTION__BATCH_COUNT: "5"
# AIRFLOW_VAR_CORE__INGESTION__BATCH_SAVE_ENABLED: "true"
AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: "60"
AIRFLOW__WEBSERVER__BASE_URL: "http://localhost:8080/airflow"
AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False"
AIRFLOW__WEBSERVER__AUTHENTICATE: "True"
AIRFLOW__WEBSERVER__AUTH_BACKEND: "airflow.contrib.auth.backends.password_auth"
AIRFLOW__WEBSERVER__RBAC: "True"
AIRFLOW__WEBSERVER__ENABLE_PROXY_FIX: "True"
AIRFLOW__WEBSERVER__WORKER_REFRESH_BATCH_SIZE: "0"
AIRFLOW__WEBSERVER__WORKER_CLASS: "sync"
# AIRFLOW__WEBSERVER__WORKERS: "8"
AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.basic_auth"
AIRFLOW_VAR_CORE__CONFIG__SHOW_SKIPPED_IDS: "True"
# AIRFLOW_VAR_CORE__CONFIG__DATALOAD_CONFIG_PATH: "/opt/airflow/dags/configs/dataload.ini"
extraEnv:
# - name: ENVIRONMENT
# value: "dev"
- name: CLOUD_PROVIDER
value: "azure"
# - name: CI_COMMIT_TAG
# value: "v0.12.0"
# - name: BUILD_TAG
# value: "v0.12.0"
- name: PYTHONPATH
value: "/opt/airflow/dags:/opt/airflow"
- name: AIRFLOW_VAR_AZURE_ENABLE_MSI
value: "false"
## Begin -- Ingest Manifest DAG variables
- name: AIRFLOW_VAR_ENTITLEMENTS_MODULE_NAME
value: "entitlements_client"
- name: AIRFLOW_VAR_CORE__CONFIG__DATALOAD_CONFIG_PATH
value: "/opt/airflow/dags/configs/dataload.ini"
- name: AIRFLOW_VAR_ENV_VARS_ENABLED
value: "true"
- name: AIRFLOW_VAR_CORE__SERVICE__PARTITION__URL
value: "http://partition.osdu-core.svc.cluster.local/api/partition/v1"
- name: AIRFLOW_VAR_CORE__SERVICE__LEGAL__HOST
value: "http://legal.osdu-core.svc.cluster.local/api/legal/v1"
- name: AIRFLOW_VAR_CORE__SERVICE__ENTITLEMENTS__URL
value: "http://entitlements.osdu-core.svc.cluster.local/api/entitlements/v2"
- name: AIRFLOW_VAR_CORE__SERVICE__SCHEMA__URL
value: "http://schema.osdu-core.svc.cluster.local/api/schema-service/v1"
- name: AIRFLOW_VAR_CORE__SERVICE__SEARCH__URL
value: "http://search.osdu-core.svc.cluster.local/api/search/v2"
- name: AIRFLOW_VAR_CORE__SERVICE__SEARCH_WITH_CURSOR__URL
value: "http://search.osdu-core.svc.cluster.local/api/search/v2/query_with_cursor"
- name: AIRFLOW_VAR_CORE__SERVICE__STORAGE__URL
value: "http://storage.osdu-core.svc.cluster.local/api/storage/v2"
- name: AIRFLOW_VAR_CORE__SERVICE__FILE__HOST
value: "http://file.osdu-core.svc.cluster.local/api/file"
- name: AIRFLOW_VAR_CORE__SERVICE__DATASET__URL
value: "http://dataset.osdu-core.svc.cluster.local/api/dataset/v1"
- name: AIRFLOW_VAR_CORE__SERVICE__WORKFLOW__HOST
value: "http://workflow.osdu-core.svc.cluster.local/api/workflow/v1"
- name: AIRFLOW_VAR_CORE__SERVICE__WORKFLOW__URL
value: "http://workflow.osdu-core.svc.cluster.local/api/workflow/v1"
- name: AIRFLOW_VAR_CORE__SERVICE__DATASET__HOST
value: "http://dataset.osdu-core.svc.cluster.local/api/dataset/v1"
- name: AIRFLOW_VAR_AZURE_TENANT_ID
valueFrom:
secretKeyRef:
name: airflow-variables
key: AIRFLOW_VAR_AZURE_TENANT_ID
- name: AIRFLOW_VAR_AZURE_CLIENT_ID
valueFrom:
secretKeyRef:
name: airflow-variables
key: AIRFLOW_VAR_AZURE_CLIENT_ID
- name: AIRFLOW_VAR_AZURE_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: airflow-variables
key: AIRFLOW_VAR_AZURE_CLIENT_SECRET
- name: AIRFLOW_VAR_AAD_CLIENT_ID
valueFrom:
secretKeyRef:
name: airflow-variables
key: AIRFLOW_VAR_AZURE_CLIENT_ID
- name: AIRFLOW_VAR_APPINSIGHTS_KEY
valueFrom:
secretKeyRef:
name: airflow-variables
key: AIRFLOW_VAR_APPINSIGHTS_KEY
# DAGs Configuration
dags:
persistence:
enabled: true
existingClaim: airflow-dags-pvc
gitSync:
enabled: false
logs:
path: /opt/airflow/logs
persistence:
enabled: true
existingClaim: airflow-logs-pvc
accessMode: ReadWriteMany
data:
metadataSecretName: airflow-secrets
# Disable Internal PostgreSQL and PgBouncer
pgbouncer:
enabled: false
postgresql:
enabled: false
externalDatabase:
type: postgres
host: "airflow-cluster-rw.postgresql.svc.cluster.local"
port: 5432
user: "dbuser"
passwordSecret: "airflow-secrets"
passwordSecretKey: "db-password"
database: "airflow-db"
redis:
enabled: false
# Not needed due to KubernetesExecutor
externalRedis:
host: "airflow-redis-master.airflow.svc.cluster.local"
port: 6379
passwordSecret: "airflow-secrets"
passwordSecretKey: "redis-password"
databaseNumber: 2
# properties: "?ssl_cert_reqs=CERT_REQUIRED"
# Enable StatsD
statsd:
enabled: true
flower:
enabled: false
scheduler:
logCleanup:
enabled: false
# podAnnotations:
# azure.workload.identity/use: "true"
workers:
enabled: false
logCleanup:
enabled: false
# podAnnotations:
# azure.workload.identity/use: "true"
web:
enabled: true
path: "/airflow"
# podAnnotations:
# azure.workload.identity/use: "true"
ingress:
enabled: false
# Use Existing Secrets for Fernet and Webserver Secret Keys
fernetKey:
enabled: true
existingSecret: keyvault-secrets
existingSecretKey: AIRFLOW__CORE__FERNET_KEY
webserverSecretKey:
enabled: true
existingSecret: keyvault-secrets
existingSecretKey: AIRFLOW__WEBSERVER__SECRET_KEY
# Affinity and Tolerations
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: agentpool
operator: In
values:
- poolz1
- poolz2
- poolz3
topologySpreadConstraints:
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
release: airflow
tolerations:
- effect: NoSchedule
key: app
value: "cluster"