o2a/mappers/git_mapper.py (69 lines of code) (raw):
# -*- coding: utf-8 -*-
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Maps Shell action into Airflow's DAG"""
import shlex
from typing import List, Optional, Set
from xml.etree.ElementTree import Element
from o2a.converter.relation import Relation
from o2a.converter.task import Task
from o2a.mappers.action_mapper import ActionMapper
from o2a.mappers.extensions.prepare_mapper_extension import PrepareMapperExtension
from o2a.o2a_libs.src.o2a_lib.property_utils import PropertySet
from o2a.utils.xml_utils import get_tag_el_text
from o2a.utils.el_utils import normalize_path
TAG_GIT_URI = "git-uri"
TAG_BRANCH = "branch"
TAG_KEY_PATH = "key-path"
TAG_DESTINATION_URI = "destination-uri"
def prepare_git_command(
git_uri: str, git_branch: Optional[str], destination_path: str, key_path: Optional[str]
):
cmd = (
f"$DAGS_FOLDER/../data/git.sh "
"--cluster {{params.config['dataproc_cluster']}} "
"--region {{params.config['gcp_region']}} "
f"--git-uri {shlex.quote(git_uri)} "
f"--destination-path {shlex.quote(destination_path)}"
)
if git_branch:
cmd += f" --branch {shlex.quote(git_branch)}"
if key_path:
cmd += f" --key-path {shlex.quote(key_path)}"
return cmd
class GitMapper(ActionMapper):
"""
Converts a Shell Oozie action to an Airflow task.
"""
def __init__(self, oozie_node: Element, name: str, props: PropertySet, **kwargs):
ActionMapper.__init__(self, oozie_node=oozie_node, name=name, props=props, **kwargs)
self.git_uri: Optional[str] = None
self.git_branch: Optional[str] = None
self.destination_path: Optional[str] = None
self.key_path_uri: Optional[str] = None
self.key_path: Optional[str] = None
self.prepare_extension: PrepareMapperExtension = PrepareMapperExtension(self)
def on_parse_node(self):
super().on_parse_node()
self.git_uri = get_tag_el_text(self.oozie_node, TAG_GIT_URI)
self.git_branch = get_tag_el_text(self.oozie_node, TAG_BRANCH)
destination_uri = get_tag_el_text(self.oozie_node, tag=TAG_DESTINATION_URI)
if destination_uri:
self.destination_path = normalize_path(destination_uri, props=self.props, translated=True)
key_path_uri = get_tag_el_text(self.oozie_node, tag=TAG_KEY_PATH)
self.key_path = (
normalize_path(key_path_uri, props=self.props, translated=True) if key_path_uri else None
)
def to_tasks_and_relations(self):
action_task = Task(
task_id=self.name,
template_name="git.tpl",
template_params=dict(
git_uri=self.git_uri,
git_branch=self.git_branch,
destination_path=self.destination_path,
key_path=self.key_path,
props=self.props,
),
)
tasks = [action_task]
relations: List[Relation] = []
prepare_task = self.prepare_extension.get_prepare_task()
if prepare_task:
tasks, relations = self.prepend_task(prepare_task, tasks, relations)
return tasks, relations
def required_imports(self) -> Set[str]:
return {"from airflow.operators import bash"}