in athena-udfs-textanalytics/src/main/java/com/amazonaws/athena/udf/textanalytics/TextAnalyticsUDFHandler.java [698:717]
private String redactPiiEntityTypes(List<PiiEntity> piiEntities, String text, String redactTypes) throws Exception
{
// redactTypes contains comma or space separated list of types, e.g. "NAME, ADDRESS"
List<String> redactTypeList = Arrays.asList(redactTypes.split("[\\s,]+"));
String result = text;
int deltaLength = 0;
for (PiiEntity piiEntity : piiEntities) {
String type = piiEntity.type().toString();
if (redactTypes.contains(type) || redactTypes.contains("ALL")) {
// this is a PII type we need to redact
// Offset logic assumes piiEntity list is ordered by occurance in string
int start = piiEntity.beginOffset() + deltaLength;
int end = piiEntity.endOffset() + deltaLength;
int length1 = result.length();
result = new String(result.substring(0, start) + "[" + type + "]" + result.substring(end));
deltaLength = deltaLength + (result.length() - length1);
}
}
return result;
}