datasets/open_targets/pipelines/copy_genetics_data/pipeline.yaml (37 lines of code) (raw):
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
resources: ~
dag:
airflow_version: 2
initialize:
dag_id: copy_genetics_data
default_args:
owner: "Google"
depends_on_past: False
start_date: '2022-04-01'
max_active_runs: 1
schedule_interval: "1 * 1 * *"
catchup: False
default_view: graph
tasks:
- operator: "KubernetesPodOperator"
description: "Transfer Open Targets Genetics Dataset"
args:
task_id: "copy_bq_datasets"
name: "copy_bq_datasets"
namespace: "composer"
service_account_name: "datasets"
image_pull_policy: "Always"
image: "{{ var.json.open_targets.container_registry.copy_bq_datasets }}"
env_vars:
SOURCE_PROJECT_ID: "{{ var.json.open_targets.genetics.source_project_id }}"
TARGET_PROJECT_ID: "{{ var.json.open_targets.genetics.target_project_id }}"
SERVICE_ACCOUNT: "{{ var.json.open_targets.service_account }}"
TRANSFER_CONFIG_NAME: "open-targets-genetics"
SOURCE_DATASET_NAME: "{{ var.json.open_targets.genetics.source_dataset_name }}"
TARGET_DATASET_NAME: "{{ var.json.open_targets.genetics.target_dataset_name }}"
resources:
request_memory: "128M"
request_cpu: "200m"
request_ephemeral_storage: "5G"
graph_paths:
- "copy_bq_datasets"