static void functional_tests()

in athena-udfs-textanalytics/src/main/java/com/amazonaws/athena/udf/textanalytics/TextAnalyticsUDFHandler.java [1397:1491]


    static void functional_tests() throws Exception
    {
        TextAnalyticsUDFHandler textAnalyticsUDFHandler = new TextAnalyticsUDFHandler();

        System.out.println("\nSPLIT LONG TEXT BLOCKS");
        runSplitLongTextTest();
        
        System.out.println("\nTEXT SPLITTING INTO SENTENCES");
        runSplitBySentenceTests(textAnalyticsUDFHandler);

        System.out.println("\nUTF-8 STRING LENGTH TESTS");
        runStringLengthTests();
        
        System.out.println("\nMERGE RESULTS TESTS");
        runMergeEntitiesTests();
        
        String[] text;
        String[] lang;

        String result;
        System.out.println("\nDETECT DOMINANT LANGUAGE");
        text = new String[]{"I am Bob", "Je m'appelle Bob"};
        // check logs for evidence of 1 batch with 2 items
        System.out.println("detect_dominant_language - 2 rows:" + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_dominant_language(text)));
        System.out.println("detect_dominant_language_all - 2 rows:" + text);
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_dominant_language_all(text)));
        
        System.out.println("\nDETECT SENTIMENT");
        text = new String[]{"I am happy", "She is sad", "ce n'est pas bon", "Je l'aime beaucoup"};
        lang = new String[]{"en", "en", "fr", "fr"};
        // check logs for evidence of 2 batches with 2 items each, grouped by lang
        System.out.println("detect_sentiment - 4 rows: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_sentiment(text, lang)));  
        System.out.println("detect_sentiment_all - 4 rows: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_sentiment_all(text, lang)));
        
        System.out.println("\nDETECT / REDACT ENTITIES");
        text = new String[]{"I am Bob, I live in Herndon", "Je suis Bob et j'habite à Herndon", "Soy Bob y vivo en Herndon"};
        lang = new String[]{"en", "fr", "es"};
        System.out.println("detect_entities - 3 rows: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_entities(text, lang)));
        System.out.println("detect_entities_all - 3 rows: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_entities_all(text, lang)));   
        System.out.println("redact_entities - 3 rows, types ALL: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.redact_entities(text, lang, makeArray("ALL", 3)))); 
        
        System.out.println("\nDETECT / REDACT PII ENTITIES");
        text = new String[]{"I am Bob, I live in Herndon"};
        lang = new String[]{"en"};
        System.out.println("detect_pii_entities - 1 row: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_pii_entities(text, lang)));
        System.out.println("detect_pii_entities_all - 1 row: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_pii_entities_all(text, lang)));   
        System.out.println("redact_pii_entities - 1 row, types ALL: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.redact_pii_entities(text, lang, makeArray("ALL", 3)))); 

        System.out.println("\nDETECT KEY PHRASES");
        text = new String[]{"I really enjoyed the book, Of Mice and Men, by John Steinbeck"};
        lang = new String[]{"en"};
        System.out.println("detect_key_phrases - 1 row: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_key_phrases(text, lang)));
        System.out.println("detect_key_phrases_all - 1 row: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_key_phrases_all(text, lang))); 

        System.out.println("\nTRANSLATE TEXT");
        text = new String[]{"I am Bob, I live in Herndon", "I love to visit France"};
        String[] sourcelang = new String[]{"en", "en"};
        String[] targetlang = new String[]{"fr", "fr"};
        String[] terminologyNames = new String[]{"null", "null"};
        System.out.println("translate_text - 2 rows: " + toJSON(text));
        System.out.println(toJSON(textAnalyticsUDFHandler.translate_text(text, sourcelang, targetlang, terminologyNames)));

        System.out.println("\nLONG TEXT TESTS");
        int textBytes = 60;
        int batchSize = 3; 
        textAnalyticsUDFHandler.maxTextBytes = textBytes;
        textAnalyticsUDFHandler.maxBatchSize = batchSize;
        System.out.println("Set max text length to " + textBytes + " bytes, and max batch size to " + batchSize + ", for testing");
        text = new String[]{"I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon."};
        lang = new String[]{"en"};
        System.out.println("detect_sentiment - 1 row: " + toJSON(text));
        System.out.println("check logs for evidence of long text truncated by detect_sentiment.");
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_sentiment(text, lang)));
        text = new String[]{"I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon."};
        System.out.println("detect_entities / redact_entities - 1 row: " + toJSON(text));
        System.out.println("check logs for evidence of long text split into 2 batches w/ max 3 rows per batch.");
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_entities(text, lang)));        
        System.out.println(toJSON(textAnalyticsUDFHandler.redact_entities(text, lang, makeArray("ALL", 1))));        
        System.out.println("detect_pii_entities / redact_pii_entities - 1 row: " + toJSON(text));
        System.out.println("check logs for evidence of long text split into 3 rows.");
        text = new String[]{"I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon. I am Bob, I live in Herndon."};
        System.out.println(toJSON(textAnalyticsUDFHandler.detect_pii_entities(text, lang)));        
        System.out.println(toJSON(textAnalyticsUDFHandler.redact_pii_entities(text, lang, makeArray("ALL", 1))));        
    }