in opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java [133:162]
private Boolean attemptShortTitlesSimilarityInWebSpace(String name1, String name2)
{
// first delimeter processing
String name1v = name1.replace("'", "").replace("-", " ");
String name2v = name2.replace("'", "").replace("-", " ");
String name1vv = name1.replace("'", "");
String name2vv = name2.replace("'", "");
String name1vvv = name1.replace("-", " ");
String name2vvv = name2.replace("-", " ");
if (name1.startsWith(name2) || name1vv.startsWith(name2) || name1.startsWith(name2v)
|| name1.startsWith(name2vv) || name1.startsWith(name2vvv) || name1v.startsWith(name2v)
|| name1v.startsWith(name2vv) || name2.startsWith(name1) || name2vv.startsWith(name1)
|| name2.startsWith(name1v) || name2vvv.startsWith(name1vv) || name2.startsWith(name1vvv)
|| name2v.startsWith(name1v) || name2v.startsWith(name1vv) || name1.endsWith(name2)
|| name1vv.endsWith(name2) || name1.endsWith(name2v) || name1.endsWith(name2vv) || name1.endsWith(name2vvv)
|| name1v.endsWith(name2v) || name1v.endsWith(name2vv) || name2.endsWith(name1) || name2vv.endsWith(name1)
|| name2.endsWith(name1v) || name1vvv.endsWith(name2vv) || name2.endsWith(name1vvv)
|| name2v.endsWith(name1v) || name2v.endsWith(name1vv))
{
LOG.info("Found fuzzy substring of name1 and name2");
return true;
}
if (name1.length() > 12 && name2.length() > 12)
return false;
return areNamesSemanticallyCloseInWebSearchSpace(name1, name2, 0.8f, false).isDecision();
}