in athena-udfs-textanalytics/src/main/java/com/amazonaws/athena/udf/textanalytics/TextAnalyticsUDFHandler.java [820:867]
private String TextSplitBatchDetectKeyPhrases(String languageCode, String[] input, boolean fullResponse) throws Exception
{
String[] result = new String[input.length];
int[] offset = new int[input.length];
int rowNum = 0;
// TODO: If batch length is more than max batch size, split into smaller batches and iterate
for (Object[] batch : getBatches(input, this.maxBatchSize)) {
String[] textArray = (String[]) batch[0];
// Call batchDetectEntities API
BatchDetectKeyPhrasesRequest batchDetectKeyPhrasesRequest = BatchDetectKeyPhrasesRequest.builder()
.textList(textArray)
.languageCode(languageCode)
.build();
BatchDetectKeyPhrasesResponse batchDetectKeyPhrasesResponse = getComprehendClient().batchDetectKeyPhrases(batchDetectKeyPhrasesRequest);
// Throw exception if errorList is populated
List<BatchItemError> batchItemError = batchDetectKeyPhrasesResponse.errorList();
if (! batchItemError.isEmpty()) {
throw new RuntimeException("Error: - ErrorList in batchDetectKeyPhrases result: " + batchItemError);
}
List<BatchDetectKeyPhrasesItemResult> batchDetectKeyPhrasesItemResult = batchDetectKeyPhrasesResponse.resultList();
if (batchDetectKeyPhrasesItemResult.size() != textArray.length) {
throw new RuntimeException("Error: - array size " + textArray.length + " and result item count " + batchDetectKeyPhrasesItemResult.size() + " do not match");
}
int cumOffset = 0;
for (int i = 0; i < batchDetectKeyPhrasesItemResult.size(); i++) {
List<KeyPhrase> keyPhrases = batchDetectKeyPhrasesItemResult.get(i).keyPhrases();
if (fullResponse) {
// return JSON structure containing all entity types, scores and offsets
result[rowNum] = this.toJSON(keyPhrases);
}
else {
result[rowNum] = getKeyPhraseValues(keyPhrases);
}
offset[rowNum] = cumOffset;
cumOffset += textArray[i].length();
rowNum++;
}
}
// merge results to single output row
String mergedResult;
if (fullResponse) {
mergedResult = mergeEntitiesAll(result, offset);
}
else {
mergedResult = mergeEntities(result);
}
return mergedResult;
}