in o2a/mappers/spark_mapper.py [0:0]
def _parse_spark_opts(spark_opts_node: ET.Element):
"""
Some examples of the spark-opts element:
--conf key1=value
--conf key2="value1 value2"
"""
conf: Dict[str, str] = {}
if spark_opts_node.text:
spark_opts = spark_opts_node.text.split("--")[1:]
else:
raise ParseException(f"Spark opts node has no text: {spark_opts_node}")
clean_opts = [opt.strip() for opt in spark_opts]
clean_opts_split = [opt.split(maxsplit=1) for opt in clean_opts]
for spark_opt in clean_opts_split:
# Can have multiple "--conf" in spark_opts
if spark_opt[0] == "conf":
key, _, value = spark_opt[1].partition("=")
# Value is required
if not value:
raise ParseException(
f"Incorrect parameter format. Expected format: key=value. Current value: {spark_opt}"
)
# Delete surrounding quotes
if len(value) > 2 and value[0] in ["'", '"'] and value:
value = value[1:-1]
conf[key] = value
# TODO: parse also other options (like --executor-memory 20G --num-executors 50 and many more)
# see: https://oozie.apache.org/docs/5.1.0/DG_SparkActionExtension.html#PySpark_with_Spark_Action
return conf