in athena-udfs-textanalytics/src/main/java/com/amazonaws/athena/udf/textanalytics/TextAnalyticsUDFHandler.java [1397:1491]
static void functional_tests() throws Exception
{
TextAnalyticsUDFHandler textAnalyticsUDFHandler = new TextAnalyticsUDFHandler();
System.out.println("\nSPLIT LONG TEXT BLOCKS");
runSplitLongTextTest();
System.out.println("\nTEXT SPLITTING INTO SENTENCES");
runSplitBySentenceTests(textAnalyticsUDFHandler);
System.out.println("\nUTF-8 STRING LENGTH TESTS");
runStringLengthTests();
System.out.println("\nMERGE RESULTS TESTS");
runMergeEntitiesTests();
String[] text;
String[] lang;
String result;
System.out.println("\nDETECT DOMINANT LANGUAGE");
text = new String[]{"I am Bob", "Je m'appelle Bob"};
// check logs for evidence of 1 batch with 2 items
System.out.println("detect_dominant_language - 2 rows:" + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.detect_dominant_language(text)));
System.out.println("detect_dominant_language_all - 2 rows:" + text);
System.out.println(toJSON(textAnalyticsUDFHandler.detect_dominant_language_all(text)));
System.out.println("\nDETECT SENTIMENT");
text = new String[]{"I am happy", "She is sad", "ce n'est pas bon", "Je l'aime beaucoup"};
lang = new String[]{"en", "en", "fr", "fr"};
// check logs for evidence of 2 batches with 2 items each, grouped by lang
System.out.println("detect_sentiment - 4 rows: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.detect_sentiment(text, lang)));
System.out.println("detect_sentiment_all - 4 rows: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.detect_sentiment_all(text, lang)));
System.out.println("\nDETECT / REDACT ENTITIES");
text = new String[]{"I am Bob, I live in Herndon", "Je suis Bob et j'habite à Herndon", "Soy Bob y vivo en Herndon"};
lang = new String[]{"en", "fr", "es"};
System.out.println("detect_entities - 3 rows: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.detect_entities(text, lang)));
System.out.println("detect_entities_all - 3 rows: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.detect_entities_all(text, lang)));
System.out.println("redact_entities - 3 rows, types ALL: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.redact_entities(text, lang, makeArray("ALL", 3))));
System.out.println("\nDETECT / REDACT PII ENTITIES");
text = new String[]{"I am Bob, I live in Herndon"};
lang = new String[]{"en"};
System.out.println("detect_pii_entities - 1 row: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.detect_pii_entities(text, lang)));
System.out.println("detect_pii_entities_all - 1 row: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.detect_pii_entities_all(text, lang)));
System.out.println("redact_pii_entities - 1 row, types ALL: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.redact_pii_entities(text, lang, makeArray("ALL", 3))));
System.out.println("\nDETECT KEY PHRASES");
text = new String[]{"I really enjoyed the book, Of Mice and Men, by John Steinbeck"};
lang = new String[]{"en"};
System.out.println("detect_key_phrases - 1 row: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.detect_key_phrases(text, lang)));
System.out.println("detect_key_phrases_all - 1 row: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.detect_key_phrases_all(text, lang)));
System.out.println("\nTRANSLATE TEXT");
text = new String[]{"I am Bob, I live in Herndon", "I love to visit France"};
String[] sourcelang = new String[]{"en", "en"};
String[] targetlang = new String[]{"fr", "fr"};
String[] terminologyNames = new String[]{"null", "null"};
System.out.println("translate_text - 2 rows: " + toJSON(text));
System.out.println(toJSON(textAnalyticsUDFHandler.translate_text(text, sourcelang, targetlang, terminologyNames)));
System.out.println("\nLONG TEXT TESTS");
int textBytes = 60;
int batchSize = 3;
textAnalyticsUDFHandler.maxTextBytes = textBytes;
textAnalyticsUDFHandler.maxBatchSize = batchSize;
System.out.println("Set max text length to " + textBytes + " bytes, and max batch size to " + batchSize + ", for testing");
text = new String[]{"I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon."};
lang = new String[]{"en"};
System.out.println("detect_sentiment - 1 row: " + toJSON(text));
System.out.println("check logs for evidence of long text truncated by detect_sentiment.");
System.out.println(toJSON(textAnalyticsUDFHandler.detect_sentiment(text, lang)));
text = new String[]{"I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon."};
System.out.println("detect_entities / redact_entities - 1 row: " + toJSON(text));
System.out.println("check logs for evidence of long text split into 2 batches w/ max 3 rows per batch.");
System.out.println(toJSON(textAnalyticsUDFHandler.detect_entities(text, lang)));
System.out.println(toJSON(textAnalyticsUDFHandler.redact_entities(text, lang, makeArray("ALL", 1))));
System.out.println("detect_pii_entities / redact_pii_entities - 1 row: " + toJSON(text));
System.out.println("check logs for evidence of long text split into 3 rows.");
text = new String[]{"I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon."};
System.out.println(toJSON(textAnalyticsUDFHandler.detect_pii_entities(text, lang)));
System.out.println(toJSON(textAnalyticsUDFHandler.redact_pii_entities(text, lang, makeArray("ALL", 1))));
}