def index_keywords()

in src/upload_to_elasticsearch.py [0:0]


def index_keywords(es, event, keywordsS3Location):
    # This is the number of seconds before the start time of the word to place
    # the hyperlink. This gives the listener some context before the word is spoken
    # to the discussion. Also browsers are precise when seeking and there is some
    # variation across browsers to the accuracy of the seek function. 10 seconds
    # is usually good, but occasionally you'll land after the word was spoken.
    audioOffset = int(os.environ['AUDIO_OFFSET'])

    response = s3_client.get_object(Bucket=keywordsS3Location['bucket'], Key=keywordsS3Location['key'])
    file_content = response['Body'].read().decode('utf-8')
    keywords = json.loads(file_content)
    actions = []
    # Iterate through all the keywords and create an index document for each phrase
    for i in range(len(keywords)):
        keyword = keywords[i]["text"]
        tags = keywords[i]["tags"]
        # Offset the time that the word was spoken to the listener has some context to the phrase
        time = str(max(float(keywords[i]["startTime"]) - audioOffset, 0))
        actions.append({
            "_index": KEYWORDS_INDEX,
            "_type": "_doc",
            "_source": {
                "PodcastName": event["PodcastName"],
                "Episode": event["Episode"],
                "url": event["podcastUrl"] + "#t=" + time,
                "text": keyword,
                "tags": tags,
                "speaker": keywords[i]["speaker"],
                "startTime": float(time)
            }
        })

    # Bulk load the documents into the index.
    result = helpers.bulk(es, actions)

    logger.info("indexed keywords to ES")
    logger.info(json.dumps(result, indent=2))
    return result