in pca-server/src/pca/pca-aws-sf-process-turn-by-turn.py [0:0]
def extract_nlp(self, segment_list):
"""
Generates sentiment per speech segment, inserting the results into the input list.
If we had no valid language for Comprehend to use then we use Neutral for everything.
It also extracts standard LOCATION entities, and calls any custom entity recognition
model that has been configured for that language
"""
client = boto3.client("comprehend")
# Setup some sentiment blocks - used when we have no Comprehend
# language or where we need "something" for Call Analytics
sentiment_set_neutral = {'Positive': 0.0, 'Negative': 0.0, 'Neutral': 1.0}
sentiment_set_positive = {'Positive': 1.0, 'Negative': 0.0, 'Neutral': 0.0}
sentiment_set_negative = {'Positive': 0.0, 'Negative': 1.0, 'Neutral': 0.0}
# Go through each of our segments
for next_segment in segment_list:
if len(next_segment.segmentText) >= MIN_SENTIMENT_LENGTH:
nextText = next_segment.segmentText
# First, set the sentiment scores in the transcript. In Call Analytics mode
# we already have a sentiment marker (+ve/-ve) per turn of the transcript
if self.api_mode == cf.API_ANALYTICS:
# Just set some fake scores against the line to match the sentiment type
if next_segment.segmentIsPositive:
next_segment.segmentAllSentiments = sentiment_set_positive
elif next_segment.segmentIsNegative:
next_segment.segmentAllSentiments = sentiment_set_negative
else:
next_segment.segmentAllSentiments = sentiment_set_neutral
# Standard Transcribe requires us to use Comprehend
else:
# We can only use Comprehend if we have a language code
if self.comprehendLanguageCode == "":
# We had no language - use default neutral sentiment scores
next_segment.segmentAllSentiments = sentiment_set_neutral
next_segment.segmentPositive = 0.0
next_segment.segmentNegative = 0.0
else:
# For Standard Transcribe we need to set the sentiment marker based on score thresholds
sentimentResponse = self.comprehend_single_sentiment(nextText, client)
positiveBase = sentimentResponse["SentimentScore"]["Positive"]
negativeBase = sentimentResponse["SentimentScore"]["Negative"]
# If we're over the NEGATIVE threshold then we're negative
if negativeBase >= self.min_sentiment_negative:
next_segment.segmentSentiment = "Negative"
next_segment.segmentIsNegative = True
next_segment.segmentSentimentScore = negativeBase
# Else if we're over the POSITIVE threshold then we're positive,
# otherwise we're NEUTRAL and we don't really care
elif positiveBase >= self.min_sentiment_positive:
next_segment.segmentSentiment = "Positive"
next_segment.segmentIsPositive = True
next_segment.segmentSentimentScore = positiveBase
# Store all of the original sentiments for future use
next_segment.segmentAllSentiments = sentimentResponse["SentimentScore"]
next_segment.segmentPositive = positiveBase
next_segment.segmentNegative = negativeBase
# If we have a language model then extract entities via Comprehend,
# and the same methodology is used for all of the Transcribe modes
if self.comprehendLanguageCode != "":
# Get sentiment and standard entity detection from Comprehend
pii_masked_text = nextText.replace(PII_PLACEHOLDER, PII_PLACEHOLDER_MASK)
entity_response = self.comprehend_single_entity(pii_masked_text, client)
# Filter for desired entity types
for detected_entity in entity_response["Entities"]:
self.extract_entities_from_line(detected_entity, next_segment, cf.appConfig[cf.CONF_ENTITY_TYPES])
# Now do the same for any entities we can find in a custom model. At the
# time of writing, Custom Entity models in Comprehend are ENGLISH ONLY
if (self.customEntityEndpointARN != "") and (self.comprehendLanguageCode == "en"):
# Call the custom model and insert
custom_entity_response = client.detect_entities(Text=pii_masked_text,
EndpointArn=self.customEntityEndpointARN)
for detected_entity in custom_entity_response["Entities"]:
self.extract_entities_from_line(detected_entity, next_segment, [])