software/components/airflow/release.yaml (237 lines of code) (raw):

--- apiVersion: helm.toolkit.fluxcd.io/v2beta1 kind: HelmRelease metadata: name: airflow namespace: flux-system labels: name: airflow spec: targetNamespace: airflow releaseName: airflow dependsOn: - name: airflow-dags namespace: airflow install: remediation: retries: 3 upgrade: remediation: retries: 10 strategy: rollback test: enable: true interval: 10m0s chart: spec: chart: airflow # Specify the desired chart version if needed # version: 8.5.1 sourceRef: kind: HelmRepository name: airflow-community namespace: flux-system values: # triggerer: # podAnnotations: # azure.workload.identity/use: "true" serviceAccount: create: false name: workload-identity-sa airflow: # podAnnotations: # azure.workload.identity/use: "true" image: repository: apache/airflow tag: 2.10.1-python3.12 executor: KubernetesExecutor securityContext: fsGroup: 1000 runAsUser: 1000 usersUpdate: true usersTemplates: ADMIN_PASSWORD: kind: secret name: airflow-secrets key: password users: - username: "admin" # This is hardcoded password: ${ADMIN_PASSWORD} role: Admin email: admin@example.com firstName: admin lastName: admin # dbMigrations: # podAnnotations: # azure.workload.identity/use: "true" # extraVolumeMounts: # ## spec: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#volumemount-v1-core # - name: logs-volume # mountPath: /opt/airflow/logs # extraVolumes: # ## spec: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#volume-v1-core # - name: logs-volume # persistentVolumeClaim: # claimName: airflow-logs-pvc # OSDU PIP Packages # 148 https://community.opengroup.org/osdu/platform/system/sdks/common-python-sdk # 668 https://community.opengroup.org/osdu/platform/data-flow/ingestion/osdu-airflow-lib # 823 https://community.opengroup.org/osdu/platform/data-flow/ingestion/osdu-ingestion-lib extraPipPackages: # - "apache-airflow-providers-microsoft-azure" - "--extra-index-url=https://community.opengroup.org/api/v4/projects/148/packages/pypi/simple" - "osdu-api>=0.27.0,<1.0.0" - "--extra-index-url=https://community.opengroup.org/api/v4/projects/668/packages/pypi/simple" - "osdu-airflow>=0.27,<1.0.0" - "--extra-index-url=https://community.opengroup.org/api/v4/projects/823/packages/pypi/simple" - "osdu-ingestion>=0.27,<1.0.0" kubernetesPodTemplate: serviceAccountName: workload-identity-sa # annotations: # azure.workload.identity/use: "true" extraPipPackages: # - "apache-airflow-providers-microsoft-azure" - "--extra-index-url=https://community.opengroup.org/api/v4/projects/148/packages/pypi/simple" - "osdu-api>=0.27.0,<1.0.0" - "--extra-index-url=https://community.opengroup.org/api/v4/projects/668/packages/pypi/simple" - "osdu-airflow>=0.27,<1.0.0" - "--extra-index-url=https://community.opengroup.org/api/v4/projects/823/packages/pypi/simple" - "osdu-ingestion>=0.27,<1.0.0" # Airflow Configuration config: AIRFLOW__METRICS__USE_PATTERN_MATCH: "True" AIRFLOW__LOGGING__LOGGING_LEVEL: "INFO" AIRFLOW__LOGGING__ENABLE_TASK_CONTEXT_LOGGER: "False" AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: "False" AIRFLOW__CORE__STORE_SERIALIZED_DAGS: "True" AIRFLOW__CORE__STORE_DAG_CODE: "True" AIRFLOW__CORE__PLUGINS_FOLDER: "/opt/airflow/plugins" AIRFLOW__CORE__PARALLELISM: "2000" AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG: "2000" AIRFLOW__CORE__DAG_CONCURRENCY: "2000" AIRFLOW__CORE__DAG_FILE_PROCESSOR_TIMEOUT: "1500" # AIRFLOW_VAR_CORE__INGESTION__BATCH_SAVE_SIZE: "500" # AIRFLOW_VAR_CORE__INGESTION__BATCH_COUNT: "5" # AIRFLOW_VAR_CORE__INGESTION__BATCH_SAVE_ENABLED: "true" AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: "60" AIRFLOW__WEBSERVER__BASE_URL: "http://localhost:8080/airflow" AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False" AIRFLOW__WEBSERVER__AUTHENTICATE: "True" AIRFLOW__WEBSERVER__AUTH_BACKEND: "airflow.contrib.auth.backends.password_auth" AIRFLOW__WEBSERVER__RBAC: "True" AIRFLOW__WEBSERVER__ENABLE_PROXY_FIX: "True" AIRFLOW__WEBSERVER__WORKER_REFRESH_BATCH_SIZE: "0" AIRFLOW__WEBSERVER__WORKER_CLASS: "sync" # AIRFLOW__WEBSERVER__WORKERS: "8" AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.basic_auth" AIRFLOW_VAR_CORE__CONFIG__SHOW_SKIPPED_IDS: "True" # AIRFLOW_VAR_CORE__CONFIG__DATALOAD_CONFIG_PATH: "/opt/airflow/dags/configs/dataload.ini" extraEnv: # - name: ENVIRONMENT # value: "dev" - name: CLOUD_PROVIDER value: "azure" # - name: CI_COMMIT_TAG # value: "v0.12.0" # - name: BUILD_TAG # value: "v0.12.0" - name: PYTHONPATH value: "/opt/airflow/dags:/opt/airflow" - name: AIRFLOW_VAR_AZURE_ENABLE_MSI value: "false" ## Begin -- Ingest Manifest DAG variables - name: AIRFLOW_VAR_ENTITLEMENTS_MODULE_NAME value: "entitlements_client" - name: AIRFLOW_VAR_CORE__CONFIG__DATALOAD_CONFIG_PATH value: "/opt/airflow/dags/configs/dataload.ini" - name: AIRFLOW_VAR_ENV_VARS_ENABLED value: "true" - name: AIRFLOW_VAR_CORE__SERVICE__PARTITION__URL value: "http://partition.osdu-core.svc.cluster.local/api/partition/v1" - name: AIRFLOW_VAR_CORE__SERVICE__LEGAL__HOST value: "http://legal.osdu-core.svc.cluster.local/api/legal/v1" - name: AIRFLOW_VAR_CORE__SERVICE__ENTITLEMENTS__URL value: "http://entitlements.osdu-core.svc.cluster.local/api/entitlements/v2" - name: AIRFLOW_VAR_CORE__SERVICE__SCHEMA__URL value: "http://schema.osdu-core.svc.cluster.local/api/schema-service/v1" - name: AIRFLOW_VAR_CORE__SERVICE__SEARCH__URL value: "http://search.osdu-core.svc.cluster.local/api/search/v2" - name: AIRFLOW_VAR_CORE__SERVICE__SEARCH_WITH_CURSOR__URL value: "http://search.osdu-core.svc.cluster.local/api/search/v2/query_with_cursor" - name: AIRFLOW_VAR_CORE__SERVICE__STORAGE__URL value: "http://storage.osdu-core.svc.cluster.local/api/storage/v2" - name: AIRFLOW_VAR_CORE__SERVICE__FILE__HOST value: "http://file.osdu-core.svc.cluster.local/api/file" - name: AIRFLOW_VAR_CORE__SERVICE__DATASET__URL value: "http://dataset.osdu-core.svc.cluster.local/api/dataset/v1" - name: AIRFLOW_VAR_CORE__SERVICE__WORKFLOW__HOST value: "http://workflow.osdu-core.svc.cluster.local/api/workflow/v1" - name: AIRFLOW_VAR_CORE__SERVICE__WORKFLOW__URL value: "http://workflow.osdu-core.svc.cluster.local/api/workflow/v1" - name: AIRFLOW_VAR_CORE__SERVICE__DATASET__HOST value: "http://dataset.osdu-core.svc.cluster.local/api/dataset/v1" - name: AIRFLOW_VAR_AZURE_TENANT_ID valueFrom: secretKeyRef: name: airflow-variables key: AIRFLOW_VAR_AZURE_TENANT_ID - name: AIRFLOW_VAR_AZURE_CLIENT_ID valueFrom: secretKeyRef: name: airflow-variables key: AIRFLOW_VAR_AZURE_CLIENT_ID - name: AIRFLOW_VAR_AZURE_CLIENT_SECRET valueFrom: secretKeyRef: name: airflow-variables key: AIRFLOW_VAR_AZURE_CLIENT_SECRET - name: AIRFLOW_VAR_AAD_CLIENT_ID valueFrom: secretKeyRef: name: airflow-variables key: AIRFLOW_VAR_AZURE_CLIENT_ID - name: AIRFLOW_VAR_APPINSIGHTS_KEY valueFrom: secretKeyRef: name: airflow-variables key: AIRFLOW_VAR_APPINSIGHTS_KEY # DAGs Configuration dags: persistence: enabled: true existingClaim: airflow-dags-pvc gitSync: enabled: false logs: path: /opt/airflow/logs persistence: enabled: true existingClaim: airflow-logs-pvc accessMode: ReadWriteMany data: metadataSecretName: airflow-secrets # Disable Internal PostgreSQL and PgBouncer pgbouncer: enabled: false postgresql: enabled: false externalDatabase: type: postgres host: "airflow-cluster-rw.postgresql.svc.cluster.local" port: 5432 user: "dbuser" passwordSecret: "airflow-secrets" passwordSecretKey: "db-password" database: "airflow-db" redis: enabled: false # Not needed due to KubernetesExecutor externalRedis: host: "airflow-redis-master.airflow.svc.cluster.local" port: 6379 passwordSecret: "airflow-secrets" passwordSecretKey: "redis-password" databaseNumber: 2 # properties: "?ssl_cert_reqs=CERT_REQUIRED" # Enable StatsD statsd: enabled: true flower: enabled: false scheduler: logCleanup: enabled: false # podAnnotations: # azure.workload.identity/use: "true" workers: enabled: false logCleanup: enabled: false # podAnnotations: # azure.workload.identity/use: "true" web: enabled: true path: "/airflow" # podAnnotations: # azure.workload.identity/use: "true" ingress: enabled: false # Use Existing Secrets for Fernet and Webserver Secret Keys fernetKey: enabled: true existingSecret: keyvault-secrets existingSecretKey: AIRFLOW__CORE__FERNET_KEY webserverSecretKey: enabled: true existingSecret: keyvault-secrets existingSecretKey: AIRFLOW__WEBSERVER__SECRET_KEY # Affinity and Tolerations affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: agentpool operator: In values: - poolz1 - poolz2 - poolz3 topologySpreadConstraints: - maxSkew: 1 topologyKey: topology.kubernetes.io/zone whenUnsatisfiable: DoNotSchedule labelSelector: matchLabels: release: airflow tolerations: - effect: NoSchedule key: app value: "cluster"