in src/blog-extract.py [0:0]
def get_kv_map(obj, bucket):
# process using image bytes
try:
response = client.analyze_document(
Document={
'S3Object': {
'Bucket': bucket,
'Name': obj
},
},
FeatureTypes=['FORMS']
)
# Get the text blocks
blocks=response['Blocks']
# get key and value maps
key_map = {}
value_map = {}
block_map = {}
curr_pos = 0
line_val = ""
for block in blocks:
curr_pos=curr_pos + 1
block_id = block['Id']
block_map[block_id] = block
if block['BlockType'] == "KEY_VALUE_SET":
if 'KEY' in block['EntityTypes']:
key_map[block_id] = block
else:
value_map[block_id] = block
if block['BlockType'] == "LINE":
if block['Text'] == "PROCEDURE":
block = blocks[curr_pos]
line_val = block['Text']
return key_map, value_map, block_map, line_val
except:
print("Failed while parsing the document")
raise