in source/lambda/apiprocessor/redact.py [0:0]
def processLine(line, entityList={"null":0}):
text = line["text"]
comprehend = line["comprehend"] if "comprehend" in line else {}
entities = comprehend["Entities"] if "Entities" in comprehend else []
previousEntity = None
phrases = []
i = 0
currentEntity = None
while(i < len(entities)):
currentEntity = entities[i]
if(previousEntity is None and currentEntity["BeginOffset"] != 0):
phrases.append(parsePhrase(0, currentEntity["BeginOffset"], text))
entityList["null"] += 1
elif(previousEntity is not None):
phrases.append(parsePhrase(previousEntity["EndOffset"], currentEntity["BeginOffset"], text))
entityList["null"] += 1
phrases.append(parsePhrase(currentEntity["BeginOffset"], currentEntity["EndOffset"], text, currentEntity["Type"]))
previousEntity = currentEntity
if(currentEntity["Type"] not in entityList):
entityList[currentEntity["Type"]] = 1
else:
entityList[currentEntity["Type"]] += 1
i += 1
if(currentEntity is not None and len(text)-1 > currentEntity["EndOffset"]):
phrases.append(parsePhrase(currentEntity["EndOffset"], len(text), text))
entityList["null"] += 1
if(currentEntity is None):
phrases.append(parsePhrase(0, len(text), text))
entityList["null"] += 1
return {
"phrases": phrases,
"entityList": entityList
}