def TranslateWebCaptions()

in source/operators/captions/webcaptions.py [0:0]
83 lines of code
18 McCabe index (conditional complexity)

    def TranslateWebCaptions(self, inputCaptions, sourceLanguageCode, targetLanguageCodes, terminology_names=[], parallel_data_names=[]):

        marker = self.marker

        try:

            translate_role = os.environ['translateRole']
            asset_id = self.operator_object.asset_id
            workflow_id = self.operator_object.workflow_execution_id
            translate_job_name = "MIE_"+asset_id+"_"+workflow_id

            # Convert WebCaptions to text with marker between caption lines
            inputEntries = map(lambda c: c["caption"], inputCaptions)
            inputDelimited = marker.join(inputEntries)

            transcript_storage_path = dataplane.generate_media_storage_path(asset_id, workflow_id)
            bucket = transcript_storage_path['S3Bucket']
            translation_input_path = transcript_storage_path['S3Key']+"webcaptions_translate_input/"
            translation_input_uri = 's3://'+bucket+"/"+translation_input_path
            translation_output_path = transcript_storage_path['S3Key']+"webcaptions_translate_output/"
            translation_output_uri = 's3://'+bucket+"/"+translation_output_path
            key = translation_input_path+"transcript_with_caption_markers.txt"

            print("put object {} {}".format(bucket, key))
            s3.put_object(Bucket=bucket, Key=key, Body=inputDelimited)

            print("create translate output folder if it doesn't exist")
            dummy_key = translation_output_path+"/"+"foo"
            s3.put_object(Bucket=bucket, Key=dummy_key, Body="foo")

            print("Translate inputs")
            print("translation_input_uri {}".format(translation_input_uri))
            print("translation_output_uri {}".format(translation_output_uri))
            print("translate_role {}".format(translate_role))

            # Kick off a job for each input language
            translate_jobs = []
            for targetLanguageCode in targetLanguageCodes:
                # Avoid translating to the same language as the source language
                if targetLanguageCode == sourceLanguageCode:
                    continue
                print("Starting translation to {}".format(targetLanguageCode))
                # Even though the API takes a list of targets, Translate only supports
                # a list of 1 or less

                # Set the job name to avoid creating the same job multiple time if
                # we retry due to an error.
                singletonTargetList = []
                singletonTargetList.append(targetLanguageCode)
                job_name = "MIE_"+asset_id+"_"+workflow_id+"_"+targetLanguageCode
                print("JobName: {}".format(job_name))

                terminology_name = []
                if len(terminology_names) > 0:
                    # Find a terminology in the list of custom terminologies
                    # that defines translations for targetLanguageCode.
                    # If there happens to be more than one terminology matching targetLanguageCode
                    # then just use the first one in the list.
                    for item in terminology_names:
                        if targetLanguageCode in item['TargetLanguageCodes']:
                            terminology_name.append(item['Name'])
                            break
                    if len(terminology_name) == 0:
                        print("No custom terminology specified.")
                    else:
                        print("Using custom terminology {}".format(terminology_name))

                parallel_data_name = []
                if len(parallel_data_names) > 0:
                    # Find a parallel data set in the list of
                    # that defines translations for targetLanguageCode.
                    # If there happens to be more than one  matching targetLanguageCode
                    # then just use the first one in the list.
                    for item in parallel_data_names:
                        if targetLanguageCode in item['TargetLanguageCodes']:
                            parallel_data_name.append(item['Name'])
                            break
                    if len(parallel_data_name) == 0:
                        print("No parallel data specified.")
                    else:
                        print("Using parallel data_names {}".format(parallel_data_name))

                translation_job_config = {
                    "JobName": job_name,
                    "InputDataConfig": {
                        'S3Uri': translation_input_uri,
                        'ContentType': self.contentType
                    },
                    "OutputDataConfig": {
                        'S3Uri': translation_output_uri
                    },
                    "DataAccessRoleArn": translate_role,
                    "SourceLanguageCode": sourceLanguageCode,
                    "TargetLanguageCodes": singletonTargetList,
                    "TerminologyNames": terminology_name,
                }
                current_region = os.environ['AWS_REGION']
                # Include Parallel Data configuration when running in a region where
                # Active Custom Translation is available. Reference:
                # https://docs.aws.amazon.com/translate/latest/dg/customizing-translations-parallel-data.html
                active_custom_translation_supported_regions = ['us-east-1', 'us-west-2', 'eu-west-1']
                if current_region in active_custom_translation_supported_regions:
                    translation_job_config["ParallelDataNames"] = parallel_data_name

            # Save the delimited transcript text to S3
                response = translate_client.start_text_translation_job(**translation_job_config)
                jobinfo = {
                    "JobId": response["JobId"],
                    "TargetLanguageCode": targetLanguageCode
                }
                translate_jobs.append(jobinfo)

                self.operator_object.add_workflow_metadata(TextTranslateJobPropertiesList=translate_jobs, TranslateSourceLanguage=sourceLanguageCode)
        except Exception as e:
            self.operator_object.update_workflow_status("Error")
            self.operator_object.add_workflow_metadata(TranslateError="Unable to start translation WebCaptions job: {e}".format(e=str(e)))
            raise MasExecutionError(self.operator_object.return_output_object())