in opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java [445:473]
public int getAttemptedNameMerge(String name1, String name2)
{
name1 = name1.replaceAll("[a-z][A-Z]", "$0&$0").replaceAll(".&.", " ");
// suspected word merge if higher case is in the middle of word
name2 = name2.replaceAll("[a-z][A-Z]", "$0&$0").replaceAll(".&.", " ");
name1 = name1.toLowerCase();
name2 = name2.toLowerCase();
if (name1.equals(name2) || name1.startsWith(name2) || name2.startsWith(name1) || name1.endsWith(name2)
|| name1.endsWith(name2) || name1.contains(name2) || name1.contains(name2)) // ??
return 2;
String name2r = name2.replace(" ", "");
if (name1.equals(name2r) || name1.startsWith(name2r) || name1.startsWith(name2r) || name1.endsWith(name2r)
|| name1.endsWith(name2r))
return 1;
String name1r = name1.replace(" ", "");
if (name1r.equals(name2r) || name1r.startsWith(name2r) || name1r.startsWith(name2) || name1r.endsWith(name2r)
|| name1r.endsWith(name2r) || name2r.equals(name1r) || name2r.startsWith(name1r)
|| name2r.startsWith(name1) || name2r.endsWith(name1r) || name2r.endsWith(name2)
)
return 1;
if (stringDistanceMeasurer.measureStringDistance(name1, name2) > 0.95)
return 2;
if (stringDistanceMeasurer.measureStringDistance(name1, name2) > 0.70)
return 1;
return 0;
}