in pca-server/src/pca/pca-aws-sf-process-turn-by-turn.py [0:0]
def extract_analytics_categories(self, categories):
"""
This will extract and return the header information for detected categories, but it will also inject
markers into the SpeechSegments to indicate on which line of the transcript a particular category should
be highlighted in a UI
@param categories: "Categories" block from the Call Analytics results
@return: JSON structure for header-level "CategoriesDetected" block
"""
# Work around each of the matched categories
timed_categories = {}
categories_detected = []
for matched_cat in categories["MatchedCategories"]:
# Record the name and the instance count, which will be 0 for a "not found" type of category
next_category = {"Name": matched_cat}
next_category["Instances"] = len(categories["MatchedDetails"][matched_cat]["PointsOfInterest"])
timestamp_array = []
# Map across all of the instance timestamps (if any)
for instance in categories["MatchedDetails"][matched_cat]["PointsOfInterest"]:
# Store the timestamps for the header
next_poi_time = {"BeginOffsetSecs": float(instance["BeginOffsetMillis"]/1000)}
next_poi_time["EndOffsetSecs"] = float(instance["EndOffsetMillis"]/1000)
timestamp_array.append((next_poi_time))
# Keep our time-keyed category list up to date
if next_poi_time["BeginOffsetSecs"] not in timed_categories:
timed_categories[next_poi_time["BeginOffsetSecs"]] = [matched_cat]
else:
timed_categories[next_poi_time["BeginOffsetSecs"]].append(matched_cat)
# "Missing" categories have no timestamps, so record them against a time of 0.0 seconds
if next_category["Instances"] == 0:
# Keep out time-keyed category list up to date
if 0.0 not in timed_categories:
timed_categories[0.0] = [matched_cat]
else:
timed_categories[0.0].append(matched_cat)
# Put it all together
next_category["Timestamps"] = timestamp_array
categories_detected.append(next_category)
# If we had some categories then ensure each segment is tagged with them
if len(timed_categories) > 0:
# Go through each speech segment and see if a category fits here
for segment in self.speechSegmentList:
for cat_time in timed_categories.copy().keys():
if cat_time <= segment.segmentStartTime:
segment.segmentCategoriesDetectedPre += timed_categories[cat_time]
timed_categories.pop(cat_time)
# If we have any categories left then tag them to the final segment
for category in timed_categories:
self.speechSegmentList[-1].segmentCategoriesDetectedPost += timed_categories[category]
# Return the header structure for detected categories
return categories_detected