in pca-server/src/pca/pca-aws-sf-process-turn-by-turn.py [0:0]
def create_simple_entity_entries(self, speech_segments):
"""
Searches through the speech segments given and updates them with any of the simple entity mapping
entries that we've found. It also updates the line-level items. Both methods simulate the same
response that we'd generate if this was via Standard or Custom Comprehend Entities
"""
# Need to check each of our speech segments for each of our entity blocks
for nextTurn in speech_segments:
# Now check this turn for each entity
turnText = nextTurn.segmentText.lower()
for nextEntity in self.simpleEntityMap:
if nextEntity in turnText:
self.matchedSimpleEntities[nextEntity] = self.simpleEntityMap[nextEntity]
# Loop through each segment looking for matches in our cut-down entity list
for entity in self.matchedSimpleEntities:
# Start by recording this in the header
entityEntry = self.matchedSimpleEntities[entity]
self.update_header_entity_count(entityEntry["Type"], entityEntry["Original"])
# Work through each segment
# TODO Need to check we don't highlight characters in the middle of transcribed word
# TODO Need to try and handle simple plurals (e.g. type="log" should match "logs")
for segment in speech_segments:
# Check if the entity text appear somewhere
turnText = segment.segmentText.lower()
searchFrom = 0
index = turnText.find(entity, searchFrom)
entityTextLength = len(entity)
# If found then add the data in the segment, and keep going until we don't find one
while index != -1:
# Got a match - add this one on, then look for another
# TODO if entityText is capitalised then use it, otherwise use segment text
nextSearchFrom = index + entityTextLength
newLineEntity = {}
newLineEntity["Score"] = 1.0
newLineEntity["Type"] = entityEntry["Type"]
newLineEntity["Text"] = entityEntry["Original"] # TODO fix as per the above
newLineEntity["BeginOffset"] = index
newLineEntity["EndOffset"] = nextSearchFrom
segment.segmentCustomEntities.append(newLineEntity)
# Now look to see if it's repeated in this segment
index = turnText.find(entity, nextSearchFrom)