def lambda_handler()

in aws-kendra-transcribe-media-search/lambda/indexer/jobcomplete.py [0:0]


def lambda_handler(event, context):
    logger.info("Received event: %s" % json.dumps(event))
    
    job_name = event['detail']['TranscriptionJobName']
    logger.info(f"Transcription job name: {job_name}")
    
    # get results of Amazon Transcribe job
    logger.info("** Retrieve transcription job **")
    transcription_job = get_transcription_job(job_name)
    
    if transcription_job == None or ('TranscriptionJob' not in transcription_job):
        logger.error("Unable to retrieve transcription from job.")
    else:
        job_status = transcription_job['TranscriptionJob']['TranscriptionJobStatus']
        media_s3url = transcription_job['TranscriptionJob']['Media']['MediaFileUri']
        item = get_file_status(media_s3url)
        if item == None:
            logger.info("Transcription job for media file not tracked in Indexer Media File table.. possibly this is a job that is not started by MediaSearch indexer")
            return
        if job_status == "FAILED":
            # job failed
            failure_reason = transcription_job['TranscriptionJob']['FailureReason']
            logger.error(f"Transcribe job failed: {job_status} - Reason {failure_reason}")
            put_file_status(
                media_s3url, lastModified=item['lastModified'], size_bytes=item['size_bytes'], duration_secs=None, status=item['status'],
                metadata_url=item['metadata_url'], metadata_lastModified=item['metadata_lastModified'],
                transcribeopts_url=item['transcribeopts_url'], transcribeopts_lastModified=item['transcribeopts_lastModified'],
                transcribe_job_id=item['transcribe_job_id'], transcribe_state="FAILED", transcribe_secs=None,
                sync_job_id=item['sync_job_id'], sync_state="NOT_SYNCED"
                )            
        else:
            # job completed
            transcript_uri = transcription_job['TranscriptionJob']['Transcript']['TranscriptFileUri']
            transcribe_secs = get_transcription_job_duration(transcription_job)
            # Update transcribe_state
            put_file_status(
                media_s3url, lastModified=item['lastModified'], size_bytes=item['size_bytes'], duration_secs=None, status=item['status'], 
                metadata_url=item['metadata_url'], metadata_lastModified=item['metadata_lastModified'],
                transcribeopts_url=item['transcribeopts_url'], transcribeopts_lastModified=item['transcribeopts_lastModified'],
                transcribe_job_id=item['transcribe_job_id'], transcribe_state="DONE", transcribe_secs=transcribe_secs,
                sync_job_id=item['sync_job_id'], sync_state=item['sync_state']
                )
            try:
                logger.info("** Process transcription and prepare for indexing **")
                [duration_secs, text] = prepare_transcript(transcript_uri)
                logger.info("** Index transcription document in Kendra **")
                put_document(dsId=DS_ID, indexId=INDEX_ID, s3url=media_s3url, item=item, text=text)
                # Update sync_state
                put_file_status(
                    media_s3url, lastModified=item['lastModified'], size_bytes=item['size_bytes'], duration_secs=duration_secs, status=item['status'], 
                    metadata_url=item['metadata_url'], metadata_lastModified=item['metadata_lastModified'],
                    transcribeopts_url=item['transcribeopts_url'], transcribeopts_lastModified=item['transcribeopts_lastModified'],
                    transcribe_job_id=item['transcribe_job_id'], transcribe_state="DONE", transcribe_secs=transcribe_secs,
                    sync_job_id=item['sync_job_id'], sync_state="DONE"
                    )
            except Exception as e:
                logger.error("Exception thrown during indexing: " + str(e))
                put_file_status(
                    media_s3url, lastModified=item['lastModified'], size_bytes=item['size_bytes'], duration_secs=None, status=item['status'], 
                    metadata_url=item['metadata_url'], metadata_lastModified=item['metadata_lastModified'],
                    transcribeopts_url=item['transcribeopts_url'], transcribeopts_lastModified=item['transcribeopts_lastModified'],
                    transcribe_job_id=item['transcribe_job_id'], transcribe_state="DONE", transcribe_secs=transcribe_secs, 
                    sync_job_id=item['sync_job_id'], sync_state="FAILED"
                    )
    # Finally, in all cases stop sync job if not more transcription jobs are pending.
    stop_kendra_sync_job_when_all_done(dsId=DS_ID, indexId=INDEX_ID)