in src/processors.py [0:0]
def redact(self, input_text, entities_list):
"""Redact the pii entities from given text."""
doc_parts_list = []
prev_entity = None
for entity in entities_list:
if entity[SCORE] < self.redaction_config.confidence_threshold:
continue
entity_type = entity[ENTITY_TYPE]
begin_offset = entity[BEGIN_OFFSET]
end_offset = entity[END_OFFSET]
if prev_entity is None:
doc_parts_list.append(input_text[:begin_offset])
else:
doc_parts_list.append(input_text[prev_entity[END_OFFSET]:begin_offset])
if ALL in self.redaction_config.pii_entity_types or entity_type in self.redaction_config.pii_entity_types:
# Redact this entity type
if self.redaction_config.mask_mode == REPLACE_WITH_PII_ENTITY_TYPE:
# Replace with PII Entity Type
doc_parts_list.append(f"[{entity_type}]")
else:
# Replace with MaskCharacter
entity_length = end_offset - begin_offset
doc_parts_list.append(self.redaction_config.mask_character * entity_length)
else:
# Don't redact this entity type
doc_parts_list.append(input_text[begin_offset:end_offset])
prev_entity = entity
if prev_entity is not None:
doc_parts_list.append(input_text[prev_entity[END_OFFSET]:])
else:
doc_parts_list.append(input_text)
return ''.join([doc_part for doc_part in doc_parts_list])