in workflow1_endpointbuilder/sam-app/functions/function3_processobjectsandupdaterows/index.py [0:0]
def process_file(key, datetime_id, bucket_name):
_class = key.split("/")[1]
s3 = boto3.client('s3')
dynamodb = boto3.client('dynamodb')
s3_response_object = s3.get_object(Bucket=bucket_name, Key=key)
object_content = s3_response_object['Body'].read()
if key.endswith(".pdf"):
images = convert_from_bytes(object_content)
all_raw_text = ""
for i, image in enumerate(images):
image_text = call_textract_for_pdf(image)
all_raw_text += image_text
update_dynamodb_row(key, _class, all_raw_text, datetime_id, bucket_name, dynamodb)
else:
image_text = call_textract_for_image(object_content)
update_dynamodb_row(key, _class, image_text, datetime_id, bucket_name, dynamodb)