in sagemaker/ground_truth/post-label/lambda_function.py [0:0]
def lambda_handler(event, context):
print("Received event: " + json.dumps(event, indent=2))
labeling_job_arn = event["labelingJobArn"]
label_attribute_name = event["labelAttributeName"]
consolidated_labels = []
s3_uri = event['payload']['s3Uri']
bucket, key = split_s3_path(s3_uri)
s3 = boto3.client('s3')
textFile = s3.get_object(Bucket=bucket, Key=key)
filecont = textFile['Body'].read()
annotations = json.loads(filecont)
for dataset in annotations:
for annotation in dataset['annotations']:
new_annotation = json.loads(annotation['annotationData']['content'])
texts = []
for label in new_annotation['transcription']['polygons']:
text, line_num, word_type = parse_label(label["label"])
vertices = label["vertices"]
texts.append({
"text": text,
"line_num": line_num,
"type": word_type,
"bb": vertices
})
label = {
'datasetObjectId': dataset['datasetObjectId'],
'consolidatedAnnotation' : {
'content': {
label_attribute_name: {
'texts': texts,
'imageSource': dataset['dataObject']
}
}
}
}
consolidated_labels.append(label)
print("Consolidated labels \n {}".format(consolidated_labels))
return consolidated_labels