in athena-udfs-textanalytics/src/main/java/com/amazonaws/athena/udf/textanalytics/TextAnalyticsUDFHandler.java [642:686]
private String TextSplitBatchDetectPiiEntities(String languageCode, String[] batch, String redactTypes, boolean fullResponse) throws Exception
{
String[] result = new String[batch.length];
int[] offset = new int[batch.length];
// Call detectPiiEntities API in loop (no multidocument batch API available)
int cumOffset = 0;
for (int i = 0; i < batch.length; i++) {
DetectPiiEntitiesRequest detectPiiEntitiesRequest = DetectPiiEntitiesRequest.builder()
.text(batch[i])
.languageCode(languageCode)
.build();
DetectPiiEntitiesResponse detectPiiEntitiesResponse = getComprehendClient().detectPiiEntities(detectPiiEntitiesRequest);
List<PiiEntity> piiEntities = detectPiiEntitiesResponse.entities();
if (fullResponse) {
// return JSON structure containing all entity types, scores and offsets
result[i] = this.toJSON(piiEntities);
}
else {
if (redactTypes.equals("")) {
// no redaction - return JSON string containing the entity types and extracted values
result[i] = getPiiEntityTypesAndValues(piiEntities, batch[i]);
}
else {
// redaction - return input string with specified PII types redacted
result[i] = redactPiiEntityTypes(piiEntities, batch[i], redactTypes);
}
}
offset[i] = cumOffset;
cumOffset += batch[i].length();
}
// merge results to single output row
String mergedResult;
if (fullResponse) {
mergedResult = mergeEntitiesAll(result, offset);
}
else {
if (redactTypes.equals("")) {
mergedResult = mergeEntities(result);
}
else {
mergedResult = mergeText(result);
}
}
return mergedResult;
}