def lambda_handler()

in src/create_transcribe_vocabulary.py [0:0]


def lambda_handler(event, context):
    print("Received event: " + json.dumps(event, indent=2))
    
    bucket = os.environ['BUCKET_NAME']

    vocabularyTerms = []

    mapping = {}

    # Process each item in the vocabulary
    for i in range(len(event['customVocabulary'])): 
        row = event['customVocabulary'][i]

        # The items are comma separated, so split them apart.
        items = row.split(",")
        for j in range(len(items)): 
            # strip removes any whitespace leading or training the word
            origItem = items[j].strip()
            item = ""

            # Some string manipulation here to make the string format match
            # what Amazon Transcribe is expecting.
            # Numbers will be replaced with the text of the number. An enhancement
            # will be to include numbers greater than 9. The code here will split them
            # into each number, so Route 53 becomes Route-Five-Three, which isn't ideal.
            # Spaces in the string need to be replaced with dashes
            # The '.' is replaced with the word dot.
            # This loop goes right to left, starting at the end of the word and 
            # working to the front.
            for k in range(len(origItem)-1,-1,-1):
                letter = origItem[k]

                if k > 0 and origItem[k].isupper() and not origItem[k-1].isspace():
                    letter = '-' + letter

                if letter.isdigit():
                    letter = convertDigitToWord[int(letter)]
                    if k > 0:
                        letter = '-' + letter

                if letter == '.':
                    letter = '-dot'

                if letter.isspace():
                    letter = '-'

                if item == '' or item.startswith('-'):
                    while letter.endswith('-'):
                        letter = letter[:-1]

                # Remove any unsupported characters
                letter = re.sub(r'[^(a-z)(A-Z)-]','',letter)

                item = letter + item                

            mapping[item] = origItem
            vocabularyTerms.append(item)

    vocabularyName = id_generator()

    # Create the vocabulary
    response = transcribe_client.create_vocabulary(
        VocabularyName=vocabularyName,
        LanguageCode='en-US',
        Phrases=vocabularyTerms
    )
    print('created vocabulary:' + vocabularyName)


    mappingKey = 'podcasts/vocabularyMapping/' + id_generator() + '.json'
    s3_response = s3_client.put_object(Body= json.dumps(mapping, indent=2), Bucket= bucket, Key=mappingKey)


    return {
        "status": response['VocabularyState'],
        "name": vocabularyName,
        "mapping": {
            "bucket": bucket,
            "key": mappingKey
        }
    }