in opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java [418:443]
private boolean succeededSpecialGroupsSymphoniesRule(String name1, String name2)
{
List<String> name1Tokens = TextProcessor.fastTokenize(name1.toLowerCase(), false);
List<String> name2Tokens = TextProcessor.fastTokenize(name2.toLowerCase(), false);
if (name1Tokens.contains("orchestra") || name2Tokens.contains("symphony") || name2Tokens.contains("orchestra")
|| name1Tokens.contains("symphony") || name2Tokens.contains("band") || name1Tokens.contains("band")
|| name2Tokens.contains("trio") || name1Tokens.contains("trio") || name1Tokens.contains("soleil")
|| name2Tokens.contains("soleil") || name1Tokens.contains("disney") || name2Tokens.contains("disney")
|| name1Tokens.contains("lang") || name2Tokens.contains("lang")) // special group 'lang lang'
{ // all words should be the
// same
List<String> name1TokensClone = new ArrayList<>(name1Tokens);
name1Tokens.removeAll(name2Tokens);
name2Tokens.removeAll(name1TokensClone);
name1Tokens.addAll(name2Tokens);
name1Tokens.removeAll(Arrays.asList(ENGLISH_PREPOSITIONS));
// name1Tokens.removeAll(Arrays.asList(this.commonWordsInEventTitles));
if (name1Tokens.size() < 1)
return true;
return false;
}
else
return true;
}