private String TextSplitBatchDetectEntities()

in athena-udfs-textanalytics/src/main/java/com/amazonaws/athena/udf/textanalytics/TextAnalyticsUDFHandler.java [430:489]


    private String TextSplitBatchDetectEntities(String languageCode, String[] input, String redactTypes, boolean fullResponse) throws Exception
    {
        String[] result = new String[input.length];
        int[] offset = new int[input.length];
        int rowNum = 0;
        // TODO: If batch length is more than max batch size, split into smaller batches and iterate
        for (Object[] batch : getBatches(input, this.maxBatchSize)) {
            String[] textArray = (String[]) batch[0];
            // Call batchDetectEntities API
            BatchDetectEntitiesRequest batchDetectEntitiesRequest = BatchDetectEntitiesRequest.builder()
                .textList(textArray)
                .languageCode(languageCode)
                .build();
            BatchDetectEntitiesResponse batchDetectEntitiesResponse = getComprehendClient().batchDetectEntities(batchDetectEntitiesRequest);
            // Throw exception if errorList is populated
            List<BatchItemError> batchItemError = batchDetectEntitiesResponse.errorList();
            if (! batchItemError.isEmpty()) {
                throw new RuntimeException("Error:  - ErrorList in batchDetectEntities result: " + batchItemError);
            }
            List<BatchDetectEntitiesItemResult> batchDetectEntitiesItemResult = batchDetectEntitiesResponse.resultList(); 
            if (batchDetectEntitiesItemResult.size() != textArray.length) {
                throw new RuntimeException("Error:  - array size " + textArray.length + " and result item count " + batchDetectEntitiesItemResult.size() + " do not match");
            }
            int cumOffset = 0;
            for (int i = 0; i < batchDetectEntitiesItemResult.size(); i++) {
                List<Entity> entities = batchDetectEntitiesItemResult.get(i).entities();
                if (fullResponse) {
                    // return JSON structure containing all entity types, scores and offsets
                    result[rowNum] = this.toJSON(entities);
                }
                else {
                    if (redactTypes.equals("")) {
                        // no redaction - return JSON string containing the entity types and extracted values
                        result[rowNum] = getEntityTypesAndValues(entities);                      
                    }
                    else {
                        // redaction - return input string with specified entity types redacted
                        result[rowNum] = redactEntityTypes(entities, textArray[i], redactTypes); 
                    }
                }
                offset[rowNum] = cumOffset;
                cumOffset += textArray[i].length();
                rowNum++;
            }
        }
        // merge results to single output row
        String mergedResult;
        if (fullResponse) {
            mergedResult = mergeEntitiesAll(result, offset);
        }
        else {
            if (redactTypes.equals("")) {
                mergedResult = mergeEntities(result);
            }
            else {
                mergedResult = mergeText(result);
            }
        }
        return mergedResult;
    }