in src/upload_to_elasticsearch.py [0:0]
def index_episode(es, event, fullEpisodeS3Location):
response = s3_client.get_object(Bucket=fullEpisodeS3Location['bucket'], Key=fullEpisodeS3Location['key'])
file_content = response['Body'].read().decode('utf-8')
fullepisode = json.loads(file_content)
audio_url = event['podcastUrl']
s3_location = "s3://" + event['audioS3Location']['bucket'] + "/" + event['audioS3Location']['key']
doc = {
'audio_url': audio_url,
'audio_type': event['audio_type'],
'title': event['Episode'],
'summary': event['summary'],
'published_time': event['publishTime'],
'source_feed': event['sourceFeed'],
'audio_s3_location': s3_location,
'transcript': fullepisode['transcript'],
'transcript_entities': fullepisode['transcript_entities']
}
if 'speakerNames' in event and len(event['speakerNames']) > 1:
doc['speakerNames'] = event['speakerNames']
logger.info("request")
logger.debug(json.dumps(doc))
# add the document to the index
start = time.time()
res = es.index(index=FULL_EPISODE_INDEX,
body=doc, id=audio_url)
logger.info("response")
logger.info(json.dumps(res, indent=4))
logger.info('REQUEST_TIME es_client.index {:10.4f}'.format(time.time() - start))