def lambda_handler()

in functions/get_comprehend_files/app.py [0:0]


def lambda_handler(event, context):
    jsonStateMachine = event

    #Getting Infos
    guid = jsonStateMachine['guid']
    s3BucketName = MAIN_BUCKET_NAME
    
    #Jobs Infos
    s3KeyPhrasesKey = jsonStateMachine['comprehend']['keyPhrasesFileUri'].replace('s3://{}/'.format(s3BucketName), '')
    s3EntitiesKey = jsonStateMachine['comprehend']['entitiesFileUri'].replace('s3://{}/'.format(s3BucketName), '')
        
    # Reading tar.gz files
    s3ObjectKeyPhrases = s3_cli.get_object(Bucket=s3BucketName, Key=s3KeyPhrasesKey)
    s3ObjectEntities = s3_cli.get_object(Bucket=s3BucketName, Key=s3EntitiesKey)
    keyPhrasesData = io.BytesIO(s3ObjectKeyPhrases['Body'].read())
    entitiesData = io.BytesIO(s3ObjectEntities['Body'].read())

    # Decompressing the 'output' files
    tarKeyPhrases = tarfile.open(fileobj=keyPhrasesData)
    tarEntities = tarfile.open(fileobj=entitiesData)
    decompressedKeyPhrasesOutput = tarKeyPhrases.extractfile('output').read().decode("utf-8")
    decompressedEntitiesOutput = tarEntities.extractfile('output').read().decode("utf-8")
        
        
    # Saving decompressed .json files on S3
    s3_cli.put_object(
        Bucket = s3BucketName,
        Key='comprehendOutputDecompressed/{}/keyPhrases.json'.format(guid),
        Body=decompressedKeyPhrasesOutput
    )
        
    s3_cli.put_object(
        Bucket = s3BucketName,
        Key='comprehendOutputDecompressed/{}/entities.json'.format(guid),
        Body=decompressedEntitiesOutput
    )
        
    jsonStateMachine['comprehendOutputDecompressed'] = {}
    jsonStateMachine['comprehendOutputDecompressed']['keyPhrases'] = 'comprehendOutputDecompressed/{}/keyPhrases.json'.format(guid)
    jsonStateMachine['comprehendOutputDecompressed']['entities'] = 'comprehendOutputDecompressed/{}/entities.json'.format(guid)
        
    return jsonStateMachine