o2a/utils/config_extractors.py (55 lines of code) (raw):
# -*- coding: utf-8 -*-
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Extractors for configuration and job-xml nodes"""
from os import path
from typing import Dict, List
import xml.etree.ElementTree as ET
from o2a.converter.constants import HDFS_FOLDER
from o2a.converter.exceptions import ParseException
from o2a.o2a_libs.src.o2a_lib import el_parser
TAG_CONFIGURATION = "configuration"
TAG_PROPERTY = "property"
TAG_NAME = "name"
TAG_VALUE = "value"
TAG_JOB_XML = "job-xml"
def extract_properties_from_configuration_node(config_node: ET.Element) -> Dict[str, str]:
"""Extracts configuration properties from ``configuration`` node"""
properties_dict: Dict[str, str] = dict()
for property_node in config_node.findall(TAG_PROPERTY):
name_node = property_node.find(TAG_NAME)
value_node = property_node.find(TAG_VALUE)
if name_node is None or value_node is None:
raise ParseException(
'Element "property" should have direct children elements: name, value. One of them does not '
"exist. Make sure the configuration element is valid."
)
name = name_node.text
value = value_node.text
if not name:
raise ParseException(
'Element "name" should have content, however its value is empty. Make sure the element has '
"the correct content."
)
if not value:
raise ParseException(
'Element "value" should have content, however its value is empty. Make sure the element has '
"the correct content."
)
properties_dict[name] = el_parser.translate(value)
return properties_dict
def extract_properties_from_job_xml_nodes(job_xml_nodes: List[ET.Element], input_directory_path: str):
"""Extracts configuration properties from ``job_xml`` nodes"""
properties_dict: Dict[str, str] = dict()
for xml_file in job_xml_nodes:
file_name = xml_file.text
if not file_name:
raise ParseException(
'Element "job-xml" should have content, however its value is empty. Make sure the element '
"has the correct content."
)
file_path = path.join(input_directory_path, HDFS_FOLDER, file_name)
config_tree = ET.parse(file_path)
config_node = config_tree.getroot()
if not config_node:
raise ParseException(
"A job-xml configuration node is specified in the workflow XML, however its value is empty."
"Make sure the path to a configuration file is valid."
)
new_properties = extract_properties_from_configuration_node(config_node)
properties_dict.update(new_properties)
return properties_dict