public StringIndexerModelData map()

in flink-ml-lib/src/main/java/org/apache/flink/ml/feature/stringindexer/StringIndexer.java [239:293]


        public StringIndexerModelData map(Map<String, Long>[] value) {
            int numCols = value.length;
            String[][] stringArrays = new String[numCols][];
            ArrayList<Tuple2<String, Long>> stringsAndCnts = new ArrayList<>();

            for (int i = 0; i < numCols; i++) {
                stringsAndCnts.clear();
                stringsAndCnts.ensureCapacity(value[i].size());
                for (Map.Entry<String, Long> entry : value[i].entrySet()) {
                    stringsAndCnts.add(Tuple2.of(entry.getKey(), entry.getValue()));
                }
                switch (stringOrderType) {
                    case ALPHABET_ASC_ORDER:
                        stringsAndCnts.sort(Comparator.comparing(valAndCnt -> valAndCnt.f0));
                        break;
                    case ALPHABET_DESC_ORDER:
                        stringsAndCnts.sort(
                                (valAndCnt1, valAndCnt2) ->
                                        -valAndCnt1.f0.compareTo(valAndCnt2.f0));
                        break;
                    case FREQUENCY_ASC_ORDER:
                        stringsAndCnts.sort(Comparator.comparing(valAndCnt -> valAndCnt.f1));
                        break;
                    case FREQUENCY_DESC_ORDER:
                        stringsAndCnts.sort(
                                (valAndCnt1, valAndCnt2) ->
                                        -valAndCnt1.f1.compareTo(valAndCnt2.f1));

                        if (stringsAndCnts.size() > maxIndexNum) {
                            ArrayList<Tuple2<String, Long>> frequentStringsAndCnts =
                                    new ArrayList<>();
                            // Reserves the last index for infrequent element.
                            frequentStringsAndCnts.ensureCapacity(maxIndexNum - 1);
                            for (int indexId = 0; indexId < maxIndexNum - 1; indexId++) {
                                frequentStringsAndCnts.add(stringsAndCnts.get(indexId));
                            }
                            stringsAndCnts = frequentStringsAndCnts;
                        }

                        break;
                    case ARBITRARY_ORDER:
                        break;
                    default:
                        throw new UnsupportedOperationException(
                                "Unsupported "
                                        + STRING_ORDER_TYPE
                                        + " type: "
                                        + stringOrderType
                                        + ".");
                }
                stringArrays[i] = stringsAndCnts.stream().map(x -> x.f0).toArray(String[]::new);
            }

            return new StringIndexerModelData(stringArrays);
        }