o2a/mappers/decision_mapper.py (43 lines of code) (raw):

# -*- coding: utf-8 -*- # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Maps decision node to Airflow's DAG""" import collections from typing import Dict, List, Set from xml.etree.ElementTree import Element from o2a.converter.task import Task from o2a.converter.relation import Relation from o2a.mappers.base_mapper import BaseMapper from o2a.o2a_libs.src.o2a_lib.property_utils import PropertySet from o2a.o2a_libs.src.o2a_lib import el_parser # noinspection PyAbstractClass class DecisionMapper(BaseMapper): """ Decision nodes have multiple paths, where they evaluate EL functions until it finds the first one that's true, else it goes to default. XML Example: <workflow-app name="foo-wf" xmlns="uri:oozie:workflow:0.1"> ... <decision name="mydecision"> <switch> <case to="reconsolidatejob"> ${fs:fileSize(secondjobOutputDir) gt 10 * GB} </case> <case to="rexpandjob"> ${fs:filSize(secondjobOutputDir) lt 100 * MB} </case> <case to="recomputejob"> ${ hadoop:counters('secondjob')[RECORDS][REDUCE_OUT] lt 1000000 } </case> <default to="end"/> </switch> </decision> ... </workflow-app> """ def __init__( self, oozie_node: Element, name: str, dag_name: str, props: PropertySet = None, **kwargs: Dict ): BaseMapper.__init__( self, oozie_node=oozie_node, name=name, dag_name=dag_name, props=props or PropertySet(job_properties={}, config={}), **kwargs, ) self._get_cases() def _get_cases(self): switch_node = self.oozie_node[0] self.case_dict: Dict[str, str] = collections.OrderedDict() self.default_case = None for case in switch_node: if "case" in case.tag: case_text = el_parser.translate(case.text.strip(), quote=False) self.case_dict[case_text] = case.attrib["to"] else: # Default return value self.default_case = case.attrib["to"] def to_tasks_and_relations(self): tasks = [ Task( task_id=self.name, template_name="decision.tpl", template_params=dict(case_dict=self.case_dict, default_case=self.default_case), ) ] relations: List[Relation] = [] return tasks, relations def required_imports(self) -> Set[str]: return {"from airflow.operators import python", "from airflow.utils import dates"}