in flink-ml-lib/src/main/java/org/apache/flink/ml/feature/vectorindexer/VectorIndexer.java [262:286]
public VectorIndexerModelData map(List<Double>[] distinctDoubles) {
Map<Integer, Map<Double, Integer>> categoryMaps = new HashMap<>();
for (int i = 0; i < distinctDoubles.length; i++) {
if (distinctDoubles[i] != null && distinctDoubles[i].size() <= maxCategories) {
double[] values =
distinctDoubles[i].stream().mapToDouble(Double::doubleValue).toArray();
Arrays.sort(values);
// If 0 exists, we put it as the first element.
int index0 = Arrays.binarySearch(values, 0);
while (index0 > 0) {
values[index0] = values[--index0];
}
if (index0 == 0) {
values[index0] = 0;
}
Map<Double, Integer> valueAndIndex = new HashMap<>(values.length);
for (int valueIdx = 0; valueIdx < values.length; valueIdx++) {
valueAndIndex.put(values[valueIdx], valueIdx);
}
categoryMaps.put(i, valueAndIndex);
}
}
return new VectorIndexerModelData(categoryMaps);
}