source/operators/comprehend/entities/start_entity_detection.py [42:97]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
patch_all()

mie_config = json.loads(os.environ['botoConfig'])
config = config.Config(**mie_config)
comprehend = boto3.client('comprehend', config=config)
s3 = boto3.client('s3', config=config)
comprehend_role = os.environ['comprehendRole']
region = os.environ['AWS_REGION']
headers = {"Content-Type": "application/json"}


def lambda_handler(event, context):
    print("We got this event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    try:
        workflow_id = operator_object.workflow_execution_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(comprehend_error="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        bucket = operator_object.input["Media"]["Text"]["S3Bucket"]
        key = operator_object.input["Media"]["Text"]["S3Key"]
        # If operator_object.input["Media"]["Text"]["S3Key"] is a json file,
        # then we're working with metadata about the text file and need to
        # get the actual transcript text from the TextTranscriptUri field.
        # Otherwise we assume operator_object.input["Media"]["Text"]["S3Key"]
        # contains only the transcript text.
        file_ext = str(key.split('.')[-1])
        if file_ext == "json":
            obj = s3.get_object(
                Bucket=bucket,
                Key=key
            )
            results = obj['Body'].read().decode('utf-8')
            results_json = json.loads(results)
            try:
                uri_data = results_json["TextTranscriptUri"]
            except KeyError:
                raise MasExecutionError("JSON can only be passed in from AWS transcribe")
            else:
                bucket = uri_data['S3Bucket']
                key = uri_data['S3Key']
        uri = "s3://" + bucket + '/' + key
        # If input text is empty then we're done.
        response = s3.head_object(Bucket=bucket, Key=key)
        # If a KmsKey is specified as an input to this operator, then use that
        # to enable encryption in the Comprehend job.
        kms_key_id = ""
        if "KmsKeyId" in operator_object.configuration:
            kms_key_id = operator_object.configuration["KmsKeyId"]
            print("Found a KMS Key Id. Encryption will be enabled in the Comprehend job.")
        else:
            print("No KMS Key was specified. Encryption will not be enabled in the Comprehend job.")
        if response['ContentLength'] < 1:
            operator_object.update_workflow_status("Complete")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



source/operators/comprehend/key_phrases/start_key_phrases.py [42:98]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
patch_all()

mie_config = json.loads(os.environ['botoConfig'])
config = config.Config(**mie_config)
comprehend = boto3.client('comprehend', config=config)

s3 = boto3.client('s3', config=config)
comprehend_role = os.environ['comprehendRole']
region = os.environ['AWS_REGION']
headers = {"Content-Type": "application/json"}


def lambda_handler(event, context):
    print("We got this event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    try:
        workflow_id = operator_object.workflow_execution_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(comprehend_error="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        bucket = operator_object.input["Media"]["Text"]["S3Bucket"]
        key = operator_object.input["Media"]["Text"]["S3Key"]
        # If operator_object.input["Media"]["Text"]["S3Key"] is a json file,
        # then we're working with metadata about the text file and need to
        # get the actual transcript text from the TextTranscriptUri field.
        # Otherwise we assume operator_object.input["Media"]["Text"]["S3Key"]
        # contains only the transcript text.
        file_ext = str(key.split('.')[-1])
        if file_ext == "json":
            obj = s3.get_object(
                Bucket=bucket,
                Key=key
            )
            results = obj['Body'].read().decode('utf-8')
            results_json = json.loads(results)
            try:
                uri_data = results_json["TextTranscriptUri"]
            except KeyError:
                raise MasExecutionError("JSON can only be passed in from AWS transcribe")
            else:
                bucket = uri_data['S3Bucket']
                key = uri_data['S3Key']
        uri = "s3://" + bucket + '/' + key
        # If input text is empty then we're done.
        response = s3.head_object(Bucket=bucket, Key=key)
        # If a KmsKey is specified as an input to this operator, then use that
        # to enable encryption in the Comprehend job.
        kms_key_id = ""
        if "KmsKeyId" in operator_object.configuration:
            kms_key_id = operator_object.configuration["KmsKeyId"]
            print("Found a KMS Key Id. Encryption will be enabled in the Comprehend job.")
        else:
            print("No KMS Key was specified. Encryption will not be enabled in the Comprehend job.")
        if response['ContentLength'] < 1:
            operator_object.update_workflow_status("Complete")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



