in src_lambda/main.py [0:0]
def handler(event, context):
"""
Execution entrypoint for AWS Lambda.
Reads a S3 bucket and filters all matching files; Retrieves paths to those
matched files, splits them, and writes them in txt files as batch job
splits; Triggers batch jobs with each job having that txt as an input.
ENV variables are set by the AWS CDK infra code.
"""
batch_job_region = aws_batch.meta.region_name
batch_job_queue = os.environ.get("BATCH_JOB_QUEUE")
batch_job_definition = os.environ.get("BATCH_JOB_DEFINITION")
dynamodb_table_name = os.environ.get("DYNAMODB_TABLE_NAME")
bucket_name = os.environ.get("S3_BUCKET_NAME")
txt_object_keys = []
for path_prefix in event['Paths']:
for (index, batch) in split(bucket_name, path_prefix, 'jpg', image_batch_limit):
txt_object_key = os.path.join(
tmp_txt_batch_split_dir,
"batch-job-split-%s.txt" % str(index)
)
txt_object_content = "\n".join(batch)
s3.Object(bucket_name, txt_object_key).put(Body=txt_object_content)
txt_object_keys.append(txt_object_key)
for index, txt_object_key in enumerate(txt_object_keys, 1):
container_overrides = {
"environment": [{
"name": "S3_BUCKET_NAME",
"value": bucket_name
}, {
"name": "S3_OBJECT_KEY",
"value": txt_object_key
}, {
"name": "DYNAMODB_TABLE_REGION",
"value": batch_job_region
}, {
"name": "DYNAMODB_TABLE_NAME",
"value": dynamodb_table_name
}]
}
batch_job_name = "aws-blog-batch-job-%s-%s" % (str(index), str(len(txt_object_keys)))
response = aws_batch.submit_job(jobName=batch_job_name,
jobQueue=batch_job_queue,
jobDefinition=batch_job_definition,
containerOverrides=container_overrides)
print(response)
return 'Lambda execution finished'