in workflow2_docsplitter/sam-app/functions/docsplitter_function/index.py [0:0]
def lambda_handler(event, context):
if event['path'] == '/':
return {
"statusCode": 200,
"statusDescription": "200 OK",
"isBase64Encoded": False,
"headers": {
"Content-Type": "text/html"
},
"body": "This is the Document Splitter API."
}
else:
request_body = event['queryStringParameters']
endpoint_arn = request_body['endpoint_arn']
bucket_name = request_body['bucket_name']
_id = datetime.now().strftime("%Y%m%d%H%M%S")
s3 = boto3.client('s3')
if event['path'] == '/s3_file':
input_pdf_uri = request_body['input_pdf_uri']
input_pdf_key = input_pdf_uri.split(bucket_name + "/", 1)[1]
s3_response_object = s3.get_object(Bucket=bucket_name, Key=input_pdf_key)
input_pdf_content = s3_response_object['Body'].read()
elif event['path'] == '/local_file':
encoded_data = event['body']
decoded_data = base64.standard_b64decode(encoded_data)
input_pdf_content = b"%PDF" + decoded_data.split(b"\r\n\r\n%PDF", 1)[1]
# pages_by_class is a dictionary
# key is class name; value is list of page numbers belonging to the key class
pages_by_class = split_input_pdf_by_class(input_pdf_content, endpoint_arn, _id)
output_zip_buffer = create_output_pdfs(input_pdf_content, pages_by_class)
output_key_name = f"workflow2_output_documents_{_id}.zip"
s3.put_object(Body=output_zip_buffer.getvalue(), Bucket=bucket_name, Key=output_key_name, ContentType='application/zip')
output_zip_file_s3_uri = f"s3://{bucket_name}/{output_key_name}"
return {
"statusCode": 200,
"statusDescription": "200 OK",
"isBase64Encoded": False,
"headers": {
"Content-Type": "text/html"
},
"body": json.dumps(
{
'output_zip_file_s3_uri': output_zip_file_s3_uri
}
)
}