in subscribers/python/export-data-sets/export-data-sets.py [0:0]
def export_revisions(data_set_id,revisions,bucket,key_pattern):
for i in range(0, len(revisions), 5):
job_ids=[]
#Trigger 5 concurrent export jobs at a time
for revision in revisions[i:i + 5]:
create_job_response = dx.create_job(
Details={
'ExportRevisionsToS3': {
"DataSetId": data_set_id,
'RevisionDestinations':[ {"RevisionId": revision['Id'], "Bucket": bucket, "KeyPattern": key_pattern}]
}},Type='EXPORT_REVISIONS_TO_S3'
)
job_id=create_job_response['Id']
job_ids.append(job_id)
#Initiate the job
print("=> Starting Job: ",job_id, "for revision: ",revision['Id'])
dx.start_job(JobId=job_id)
#Wait for all import jobs to finish
for job in job_ids:
max_time = time.time() + 60*60 # 1 hour
#print(job)
while time.time() < max_time :
response = dx.get_job(JobId=job_id);
status = response['State']
print('STATUS: ',job,'get_job_status'+": {}".format(status))
if status == "COMPLETED":
break
elif status == "ERROR":
print(response)
print("Export failed")
exit()
time.sleep(5)
time.sleep(15)