in geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/FuzzyStringMatchScorer.java [67:93]
public double getDiceCoefficient(String s1, String s2, int nGrams) {
if (s1.isEmpty() || s2.isEmpty()) {
return 0d;
}
List<String> s1Grams = new ArrayList<>();
List<String> s2Grams = new ArrayList<>();
String[] split1 = s1.split("[ ,]");
for (String token : split1) {
if (token.trim().isEmpty()) {
continue;
}
s1Grams.add(token);
}
String[] split2 = s2.split("[ ,]");
for (String token : split2) {
if (token.trim().isEmpty()) {
continue;
}
s2Grams.add(token);
}
Set<String> overlap = new HashSet<>(s1Grams);
overlap.retainAll(s2Grams);
double totcombigrams = overlap.size();
return (2 * totcombigrams) / (s1Grams.size() + s2Grams.size());
}