private String TextSplitBatchDetectKeyPhrases()

in athena-udfs-textanalytics/src/main/java/com/amazonaws/athena/udf/textanalytics/TextAnalyticsUDFHandler.java [820:867]


    private String TextSplitBatchDetectKeyPhrases(String languageCode, String[] input, boolean fullResponse) throws Exception
    {
        String[] result = new String[input.length];
        int[] offset = new int[input.length];
        int rowNum = 0;
        // TODO: If batch length is more than max batch size, split into smaller batches and iterate
        for (Object[] batch : getBatches(input, this.maxBatchSize)) {
            String[] textArray = (String[]) batch[0];
            // Call batchDetectEntities API
            BatchDetectKeyPhrasesRequest batchDetectKeyPhrasesRequest = BatchDetectKeyPhrasesRequest.builder()
                .textList(textArray)
                .languageCode(languageCode)
                .build();
            BatchDetectKeyPhrasesResponse batchDetectKeyPhrasesResponse = getComprehendClient().batchDetectKeyPhrases(batchDetectKeyPhrasesRequest);
            // Throw exception if errorList is populated
            List<BatchItemError> batchItemError = batchDetectKeyPhrasesResponse.errorList();
            if (! batchItemError.isEmpty()) {
                throw new RuntimeException("Error:  - ErrorList in batchDetectKeyPhrases result: " + batchItemError);
            }
            List<BatchDetectKeyPhrasesItemResult> batchDetectKeyPhrasesItemResult = batchDetectKeyPhrasesResponse.resultList(); 
            if (batchDetectKeyPhrasesItemResult.size() != textArray.length) {
                throw new RuntimeException("Error:  - array size " + textArray.length + " and result item count " + batchDetectKeyPhrasesItemResult.size() + " do not match");
            }
            int cumOffset = 0;
            for (int i = 0; i < batchDetectKeyPhrasesItemResult.size(); i++) {
                List<KeyPhrase> keyPhrases = batchDetectKeyPhrasesItemResult.get(i).keyPhrases();
                if (fullResponse) {
                    // return JSON structure containing all entity types, scores and offsets
                    result[rowNum] = this.toJSON(keyPhrases);
                }
                else {
                    result[rowNum] = getKeyPhraseValues(keyPhrases);                      
                }
                offset[rowNum] = cumOffset;
                cumOffset += textArray[i].length();
                rowNum++;
            }
        }
        // merge results to single output row
        String mergedResult;
        if (fullResponse) {
            mergedResult = mergeEntitiesAll(result, offset);
        }
        else {
            mergedResult = mergeEntities(result);
        }
        return mergedResult;
    }