static

in oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzerMappings.java [100:261]


    static {
        // BiFunction<T, U, R>
        // renames the key from input parameters (T) using the key value map (U) and returns back the transformed parameters (R)
        BiFunction<Map<String, Object>, Map<String, String>, Map<String, Object>> reKey = (luceneParams, keys) -> {
            keys.forEach((key, value) -> {
                if (luceneParams.containsKey(key)) {
                    luceneParams.put(value, luceneParams.remove(key));
                }
            });
            return luceneParams;
        };

        LUCENE_ELASTIC_TRANSFORMERS = new LinkedHashMap<>();

        LUCENE_ELASTIC_TRANSFORMERS.put(WordDelimiterFilterFactory.class, luceneParams -> {
            if (luceneParams.containsKey("generateWordParts")) {
                luceneParams.put("generateWordParts", Integer.parseInt(luceneParams.get("generateWordParts").toString()) == 1);
            }
            if (luceneParams.containsKey("generateNumberParts")) {
                luceneParams.put("generateNumberParts", Integer.parseInt(luceneParams.get("generateNumberParts").toString()) == 1);
            }
            if (luceneParams.containsKey("catenateWords")) {
                luceneParams.put("catenateWords", Integer.parseInt(luceneParams.get("catenateWords").toString()) == 1);
            }
            if (luceneParams.containsKey("catenateNumbers")) {
                luceneParams.put("catenateNumbers", Integer.parseInt(luceneParams.get("catenateNumbers").toString()) == 1);
            }
            if (luceneParams.containsKey("catenateAll")) {
                luceneParams.put("catenateAll", Integer.parseInt(luceneParams.get("catenateAll").toString()) == 1);
            }
            if (luceneParams.containsKey("splitOnCaseChange")) {
                luceneParams.put("splitOnCaseChange", Integer.parseInt(luceneParams.get("splitOnCaseChange").toString()) == 1);
            }
            if (luceneParams.containsKey("preserveOriginal")) {
                luceneParams.put("preserveOriginal", Integer.parseInt(luceneParams.get("preserveOriginal").toString()) == 1);
            }
            if (luceneParams.containsKey("splitOnNumerics")) {
                luceneParams.put("splitOnNumerics", Integer.parseInt(luceneParams.get("splitOnNumerics").toString()) == 1);
            }
            if (luceneParams.containsKey("stemEnglishPossessive")) {
                luceneParams.put("stemEnglishPossessive", Integer.parseInt(luceneParams.get("stemEnglishPossessive").toString()) == 1);
            }
            return reKey.apply(luceneParams, Map.of(
                    "generateWordParts", "generate_word_parts",
                    "generateNumberParts", "generate_number_parts",
                    "catenateWords", "catenate_words",
                    "catenateNumbers", "catenate_numbers",
                    "catenateAll", "catenate_all",
                    "splitOnCaseChange", "split_on_case_change",
                    "preserveOriginal", "preserve_original",
                    "splitOnNumerics", "split_on_numerics",
                    "stemEnglishPossessive", "stem_english_possessive",
                    "protectedTokens", "protected_words"
            ));
        });

        LUCENE_ELASTIC_TRANSFORMERS.put(ShingleFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of(
                        "minShingleSize", "min_shingle_size",
                        "maxShingleSize", "max_shingle_size",
                        "outputUnigrams", "output_unigrams",
                        "outputUnigramsIfNoShingles", "output_unigrams_if_no_shingles",
                        "tokenSeparator", "token_separator",
                        "fillerToken", "filler_token"
                ))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(PatternCaptureGroupFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of("pattern", "patterns"))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(MinHashFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of(
                        "hashCount", "hash_count",
                        "bucketCount", "bucket_count",
                        "hashSetSize", "hash_set_size",
                        "withRotation", "with_rotation"
                ))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(LimitTokenCountFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of(
                        "maxTokenCount", "max_token_count",
                        "consumeAllTokens", "consume_all_tokens"
                ))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(KeepWordFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of(
                        "words", "keep_words",
                        "ignoreCase", "keep_words_case"
                ))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(ElisionFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of("ignoreCase", "articles_case"))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(EdgeNGramFilterFactory.class, luceneParams -> {
            luceneParams.remove("side");
            return reKey.apply(luceneParams, Map.of(
                    "minGramSize", "min_gram",
                    "maxGramSize", "max_gram"
            ));
        });

        LUCENE_ELASTIC_TRANSFORMERS.put(NGramFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of(
                "minGramSize", "min_gram",
                "maxGramSize", "max_gram"
                ))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(DelimitedPayloadTokenFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of("encoder", "encoding"))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(CommonGramsFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of("words", "common_words"))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(MappingCharFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of("mapping", "mappings"))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(SynonymFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of("tokenizerFactory", "tokenizer"))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(KeywordMarkerFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of("protected", "keywords"))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(ASCIIFoldingFilterFactory.class, luceneParams ->
                reKey.apply(luceneParams, Map.of("preserveOriginal", "preserve_original"))
        );

        LUCENE_ELASTIC_TRANSFORMERS.put(CJKBigramFilterFactory.class, luceneParams -> {
            List<String> ignored = new ArrayList<>();
            if (!Boolean.parseBoolean(luceneParams.getOrDefault("hal", true).toString())) {
                ignored.add("hal");
            }
            if (!Boolean.parseBoolean(luceneParams.getOrDefault("hangul", true).toString())) {
                ignored.add("hangul");
            }
            if (!Boolean.parseBoolean(luceneParams.getOrDefault("hiragana", true).toString())) {
                ignored.add("hiragana");
            }
            if (!Boolean.parseBoolean(luceneParams.getOrDefault("katakana", true).toString())) {
                ignored.add("katakana");
            }
            if (!ignored.isEmpty()) {
                luceneParams.put("ignored_scripts", ignored);
            }
            return reKey.apply(luceneParams, Map.of("outputUnigrams", "output_unigrams"));
        });

        LUCENE_ELASTIC_TRANSFORMERS.put(AbstractWordsFileFilterFactory.class, luceneParams -> {
            luceneParams.remove("enablePositionIncrements");
            return reKey.apply(luceneParams, Map.of("words", "stopwords", "ignoreCase", "ignore_case"));
        });
    }