in athena-udfs-textanalytics/src/main/java/com/amazonaws/athena/udf/textanalytics/TextAnalyticsUDFHandler.java [501:520]
private String redactEntityTypes(List<Entity> entities, String text, String redactTypes) throws Exception
{
// redactTypes contains comma or space separated list of types, e.g. "NAME, ADDRESS"
List<String> redactTypeList = Arrays.asList(redactTypes.split("[\\s,]+"));
String result = text;
int deltaLength = 0;
for (Entity entity : entities) {
String type = entity.type().toString();
if (redactTypes.contains(type) || redactTypes.contains("ALL")) {
// this is a PII type we need to redact
// Offset logic assumes piiEntity list is ordered by occurance in string
int start = entity.beginOffset() + deltaLength;
int end = entity.endOffset() + deltaLength;
int length1 = result.length();
result = new String(result.substring(0, start) + "[" + type + "]" + result.substring(end));
deltaLength = deltaLength + (result.length() - length1);
}
}
return result;
}