in scripts/glue-etl-job.py [0:0]
def main(config):
s3_client = boto3.client(service_name='s3')
# delete existing output prefix in case we there is a retry attempt
s3_delete_prefix(s3_client, config["s3_bucket"], config["s3_output_prefix"] )
glue = boto3.client(service_name='glue')
job_name=f"a2d2-metadata-etl-{str(time.time()).replace('.','')}"
job = glue.create_job(Name=job_name, Role=config["glue_role"],
GlueVersion='2.0',
WorkerType='G.2X',
NumberOfWorkers=11,
Command={'Name': 'glueetl',
'ScriptLocation': config['script_location'],
'PythonVersion': '3'
},
DefaultArguments = {'--job-language': 'python',
'--s3_bucket': config["s3_bucket"],
'--s3_output_prefix': config["s3_output_prefix"]})
job_run = glue.start_job_run(JobName=job['Name'])
status = glue.get_job_run(JobName=job['Name'], RunId=job_run['JobRunId'])
print(str(status))
run_state = status['JobRun']['JobRunState']
while run_state == "RUNNING" or run_state == "STARTING" or run_state == "STOPPING" or run_state == "STOPPED":
print(f"Glue Job is {run_state}")
time.sleep(30)
status = glue.get_job_run(JobName=job['Name'], RunId=job_run['JobRunId'])
run_state = status['JobRun']['JobRunState']
if run_state != "SUCCEEDED":
import sys
sys.exit(f"Glue job final status: {run_state}")