def processLine()

in source/lambda/apiprocessor/redact.py [0:0]


def processLine(line, entityList={"null":0}):
  text = line["text"]
  comprehend = line["comprehend"] if "comprehend" in line else {}
  entities = comprehend["Entities"] if "Entities" in comprehend else []
  previousEntity = None
  phrases = []
  i = 0
  currentEntity = None
  while(i < len(entities)):
    currentEntity = entities[i]
    if(previousEntity is None and currentEntity["BeginOffset"] != 0):
      phrases.append(parsePhrase(0, currentEntity["BeginOffset"], text))
      entityList["null"] += 1
    elif(previousEntity is not None):
      phrases.append(parsePhrase(previousEntity["EndOffset"], currentEntity["BeginOffset"], text))
      entityList["null"] += 1
    phrases.append(parsePhrase(currentEntity["BeginOffset"], currentEntity["EndOffset"], text, currentEntity["Type"]))
    previousEntity = currentEntity
    if(currentEntity["Type"] not in entityList):
      entityList[currentEntity["Type"]] = 1
    else:
      entityList[currentEntity["Type"]] += 1
    i += 1
  if(currentEntity is not None and len(text)-1 > currentEntity["EndOffset"]):
    phrases.append(parsePhrase(currentEntity["EndOffset"], len(text), text))
    entityList["null"] += 1
  if(currentEntity is None):
    phrases.append(parsePhrase(0, len(text), text))
    entityList["null"] += 1
  return {
    "phrases": phrases,
    "entityList": entityList
  }