def lambda_handler()

in source/operators/transcribe/get_transcribe.py [0:0]


def lambda_handler(event, context):
    print("We got this event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    # If Transcribe wasn't run due to silent audio, then we're done
    if "Mediainfo_num_audio_tracks" in event["Input"]["MetaData"] and event["Input"]["MetaData"]["Mediainfo_num_audio_tracks"] == "0":
        operator_object.update_workflow_status("Complete")
        return operator_object.return_output_object()
    try:
        job_id = operator_object.metadata["TranscribeJobId"]
        workflow_id = operator_object.workflow_execution_id
        asset_id = operator_object.asset_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(TranscribeError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        response = transcribe.get_transcription_job(
            TranscriptionJobName=job_id
        )
        source_language = response['TranscriptionJob']['LanguageCode']
        print("get_transcription_job response:")
        print(response)
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(TranscribeError=str(e), TranscribeJobId=job_id, )
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if response["TranscriptionJob"]["TranscriptionJobStatus"] == "IN_PROGRESS":
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(TranscribeJobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        elif response["TranscriptionJob"]["TranscriptionJobStatus"] == "FAILED":
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(TranscribeJobId=job_id,
                                          TranscribeError=str(response["TranscriptionJob"]["FailureReason"]))
            raise MasExecutionError(operator_object.return_output_object())
        elif response["TranscriptionJob"]["TranscriptionJobStatus"] == "COMPLETED":
            transcribe_uri = response["TranscriptionJob"]["Transcript"]["TranscriptFileUri"]
            http = urllib3.PoolManager()
            transcription = http.request('GET', transcribe_uri)
            transcription_data = transcription.data.decode("utf-8")
            transcription_json = json.loads(transcription_data)

            text_only_transcript = ''

            for transcripts in transcription_json["results"]["transcripts"]:
                transcript = transcripts["transcript"]
                text_only_transcript = text_only_transcript.join(transcript)

            print(text_only_transcript)

            dataplane = DataPlane()
            s3 = boto3.client('s3', config=config)

            transcript_storage_path = dataplane.generate_media_storage_path(asset_id, workflow_id)

            key = transcript_storage_path['S3Key'] + "transcript.txt"
            bucket = transcript_storage_path['S3Bucket']

            s3.put_object(Bucket=bucket, Key=key, Body=text_only_transcript)

            transcription_json["TextTranscriptUri"] = {"S3Bucket": bucket, "S3Key": key}

            metadata_upload = dataplane.store_asset_metadata(asset_id, operator_object.name, workflow_id,
                                                             transcription_json)
            if "Status" not in metadata_upload:
                operator_object.add_workflow_metadata(
                    TranscribeError="Unable to upload metadata for asset: {asset}".format(asset=asset_id),
                    TranscribeJobId=job_id)
                operator_object.update_workflow_status("Error")
                raise MasExecutionError(operator_object.return_output_object())
            else:
                if metadata_upload['Status'] == 'Success':
                    operator_object.add_media_object('Text', metadata_upload['Bucket'], metadata_upload['Key'])
                    # The source language may be user-specified or auto-detected by
                    # Transcribe. Either way, pass it to downstream operators as
                    # workflow metadata.
                    operator_object.add_workflow_metadata(TranscribeJobId=job_id,TranscribeSourceLanguage=source_language)

                    operator_object.update_workflow_status("Complete")
                    return operator_object.return_output_object()
                else:
                    operator_object.add_workflow_metadata(
                        TranscribeError="Unable to upload metadata for asset: {asset}".format(asset=asset_id),
                        TranscribeJobId=job_id)
                    operator_object.update_workflow_status("Error")
                    raise MasExecutionError(operator_object.return_output_object())
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(TranscribeError="Unable to determine status")
            raise MasExecutionError(operator_object.return_output_object())