def create_output_conversation_analytics()

in pca-server/src/pca/pca-aws-sf-process-turn-by-turn.py [0:0]


    def create_output_conversation_analytics(self):
        '''
        Generates some conversation-level analytics for this document, which includes information
        about the call, speaker labels, sentiment trends and entities
        '''
        resultsHeaderInfo = {}

        # Basic information.  Note, we expect the input stream processing mechanism
        # to set the conversation time - if it is not set then we have no choice
        # but to default this to the current processing time.
        resultsHeaderInfo["GUID"] = self.guid
        resultsHeaderInfo["Agent"] = self.agent
        resultsHeaderInfo["ConversationTime"] = self.conversationTime
        resultsHeaderInfo["ConversationLocation"] = self.conversationLocation
        resultsHeaderInfo["ProcessTime"] = str(datetime.now())
        resultsHeaderInfo["LanguageCode"] = self.conversationLanguageCode
        resultsHeaderInfo["Duration"] = str(self.duration)
        if self.conversationTime == "":
            resultsHeaderInfo["ConversationTime"] = resultsHeaderInfo["ProcessTime"]

        # Build up a list of speaker labels from the config; note that if we
        # have more speakers than configured then we still return something
        speakerLabels = []

        # Standard Transcribe - look them up in the order in the config
        if self.api_mode == cf.API_STANDARD:
            for speaker in range(self.maxSpeakerIndex + 1):
                next_label = {}
                next_label["Speaker"] = KNOWN_SPEAKER_PREFIX + str(speaker)
                try:
                    next_label["DisplayText"] = cf.appConfig[cf.CONF_SPEAKER_NAMES][speaker]
                except:
                    next_label["DisplayText"] = UNKNOWN_SPEAKER_PREFIX + str(speaker)
                speakerLabels.append(next_label)
        # Analytics is more prescriptive - they're defined in the call results
        elif self.api_mode == cf.API_ANALYTICS:
            for speaker in self.analytics_channel_map:
                next_label = {}
                next_label["Speaker"] = KNOWN_SPEAKER_PREFIX + str(self.analytics_channel_map[speaker])
                next_label["DisplayText"] = speaker.title()
                speakerLabels.append(next_label)
        resultsHeaderInfo["SpeakerLabels"] = speakerLabels

        # Sentiment Trends
        sentimentTrends = {}
        for speaker in range(self.maxSpeakerIndex + 1):
            full_name = KNOWN_SPEAKER_PREFIX + str(speaker)
            sentimentTrends[full_name] = self.generate_speaker_sentiment_trend(full_name, speaker)
        resultsHeaderInfo["SentimentTrends"] = sentimentTrends

        # Analytics mode additional metadata
        if self.api_mode == cf.API_ANALYTICS:
            # Speaker and non-talk time
            resultsHeaderInfo["SpeakerTime"] = self.extract_analytics_speaker_time(self.asr_output["ConversationCharacteristics"])
            resultsHeaderInfo["CategoriesDetected"] = self.extract_analytics_categories(self.asr_output["Categories"])
            resultsHeaderInfo["IssuesDetected"] = self.issues_detected
            resultsHeaderInfo["CombinedAnalyticsGraph"] = self.create_combined_tca_graphic()
        # For non-analytics mode, we can simulate some analytics data
        elif self.api_mode == cf.API_STANDARD:
            # Calculate the speaker time from the speech segments, once per speaker (can't do silent time like this)
            resultsHeaderInfo["SpeakerTime"] = {}
            for speaker in resultsHeaderInfo["SpeakerLabels"]:
                speaker_label = speaker["Speaker"]
                speaker_time = sum((segment.segmentEndTime - segment.segmentStartTime) for segment in self.speechSegmentList if segment.segmentSpeaker == speaker_label)
                resultsHeaderInfo["SpeakerTime"][speaker_label] = {"TotalTimeSecs": speaker_time}

        # Detected custom entity summaries next
        customEntityList = []
        for entity in self.headerEntityDict:
            nextEntity = {}
            nextEntity["Name"] = entity
            nextEntity["Instances"] = len(self.headerEntityDict[entity])
            nextEntity["Values"] = self.headerEntityDict[entity]
            customEntityList.append(nextEntity)
        resultsHeaderInfo["CustomEntities"] = customEntityList

        # Decide which source information block to add - only one for now
        transcribeSourceInfo = {}
        transcribeSourceInfo["TranscribeJobInfo"] = self.create_output_transcribe_job_info()
        sourceInfo = []
        sourceInfo.append(transcribeSourceInfo)
        resultsHeaderInfo["SourceInformation"] = sourceInfo

        # Add on any file-based entity used
        if self.simpleEntityMatchingUsed:
            resultsHeaderInfo["EntityRecognizerName"] = cf.appConfig[cf.CONF_ENTITY_FILE]
        elif self.customEntityEndpointName != "":
            resultsHeaderInfo["EntityRecognizerName"] = self.customEntityEndpointName

        return resultsHeaderInfo