in functions/get_comprehend_files/app.py [0:0]
def lambda_handler(event, context):
jsonStateMachine = event
#Getting Infos
guid = jsonStateMachine['guid']
s3BucketName = MAIN_BUCKET_NAME
#Jobs Infos
s3KeyPhrasesKey = jsonStateMachine['comprehend']['keyPhrasesFileUri'].replace('s3://{}/'.format(s3BucketName), '')
s3EntitiesKey = jsonStateMachine['comprehend']['entitiesFileUri'].replace('s3://{}/'.format(s3BucketName), '')
# Reading tar.gz files
s3ObjectKeyPhrases = s3_cli.get_object(Bucket=s3BucketName, Key=s3KeyPhrasesKey)
s3ObjectEntities = s3_cli.get_object(Bucket=s3BucketName, Key=s3EntitiesKey)
keyPhrasesData = io.BytesIO(s3ObjectKeyPhrases['Body'].read())
entitiesData = io.BytesIO(s3ObjectEntities['Body'].read())
# Decompressing the 'output' files
tarKeyPhrases = tarfile.open(fileobj=keyPhrasesData)
tarEntities = tarfile.open(fileobj=entitiesData)
decompressedKeyPhrasesOutput = tarKeyPhrases.extractfile('output').read().decode("utf-8")
decompressedEntitiesOutput = tarEntities.extractfile('output').read().decode("utf-8")
# Saving decompressed .json files on S3
s3_cli.put_object(
Bucket = s3BucketName,
Key='comprehendOutputDecompressed/{}/keyPhrases.json'.format(guid),
Body=decompressedKeyPhrasesOutput
)
s3_cli.put_object(
Bucket = s3BucketName,
Key='comprehendOutputDecompressed/{}/entities.json'.format(guid),
Body=decompressedEntitiesOutput
)
jsonStateMachine['comprehendOutputDecompressed'] = {}
jsonStateMachine['comprehendOutputDecompressed']['keyPhrases'] = 'comprehendOutputDecompressed/{}/keyPhrases.json'.format(guid)
jsonStateMachine['comprehendOutputDecompressed']['entities'] = 'comprehendOutputDecompressed/{}/entities.json'.format(guid)
return jsonStateMachine