in tools/start-job-run-converter/startJobRunConverter.py [0:0]
def generate_start_job_cmd(spark_cmd_line, start_job_args):
start_job_cmd = "aws emr-containers start-job-run \\\n"
start_idx, curr_idx = 0, 0
while curr_idx < len(spark_cmd_line):
curr_arg = spark_cmd_line[curr_idx].strip()
if curr_arg:
if SPARK_SUBMIT in curr_arg:
start_idx = curr_idx + 1
elif curr_arg.startswith("-"):
if curr_arg not in SPARK_UNARY_ARGUMENTS:
curr_idx += 1 # the argument is a pair e.g. --num-executors 50
else:
break
curr_idx += 1
spark_submit_parameters = add_quote(spark_cmd_line[start_idx: curr_idx])
entrypoint_location = spark_cmd_line[curr_idx]
entrypoint_arguments = add_quote(spark_cmd_line[curr_idx + 1:])
job_driver = {"sparkSubmitJobDriver": {
"entryPoint": entrypoint_location,
"entryPointArguments": entrypoint_arguments,
"sparkSubmitParameters": " ".join(spark_submit_parameters)
}}
res_str = add_quote(json.dumps(job_driver, indent=4), quote="'", guard="\n")
res_str = re.sub(r"\${?[0-9a-zA-Z_]+}?", convert_matched_var, res_str)
start_job_args["job_driver"] = res_str + "\n"
for k, v in start_job_args.items():
if k not in CONVERTER_ARGUMENTS and v:
start_job_cmd += "--" + normalize_arg_key(k) + " " + add_quote(v, quote="'", guard="\n") + " \\\n"
return start_job_cmd[:len(start_job_cmd) - 2] + "\n"