in opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java [240:316]
public static List<String> getStringMatchFeatures(MentionContext mention, DiscourseEntity entity) {
boolean sameHead = false;
boolean modsMatch = false;
boolean titleMatch = false;
boolean nonTheModsMatch = false;
List<String> features = new ArrayList<>();
Parse[] mtokens = mention.getTokenParses();
Set<String> ecModSet = constructModifierSet(mtokens, mention.getHeadTokenIndex());
String mentionHeadString = mention.getHeadTokenText().toLowerCase();
Set<String> featureSet = new HashSet<>();
for (Iterator<MentionContext> ei = entity.getMentions(); ei.hasNext();) {
MentionContext entityMention = ei.next();
String exactMatchFeature = getExactMatchFeature(entityMention, mention);
if (exactMatchFeature != null) {
featureSet.add(exactMatchFeature);
}
else if (entityMention.getParse().isCoordinatedNounPhrase()
&& !mention.getParse().isCoordinatedNounPhrase()) {
featureSet.add("cmix");
}
else {
String mentionStrip = stripNp(mention);
String entityMentionStrip = stripNp(entityMention);
if (mentionStrip != null && entityMentionStrip != null) {
if (isSubstring(mentionStrip, entityMentionStrip)) {
featureSet.add("substring");
}
}
}
Parse[] xtoks = entityMention.getTokenParses();
int headIndex = entityMention.getHeadTokenIndex();
String entityMentionHeadString = entityMention.getHeadTokenText().toLowerCase();
// model lexical similarity
if (mentionHeadString.equals(entityMentionHeadString)) {
sameHead = true;
featureSet.add("hds=" + mentionHeadString);
if (!modsMatch || !nonTheModsMatch) { //only check if we haven't already found one which is the same
modsMatch = true;
nonTheModsMatch = true;
Set<String> entityMentionModifierSet = constructModifierSet(xtoks, headIndex);
for (String mw : ecModSet) {
if (!entityMentionModifierSet.contains(mw)) {
modsMatch = false;
if (!mw.equals("the")) {
nonTheModsMatch = false;
featureSet.add("mmw=" + mw);
}
}
}
}
}
Set<String> descModSet = constructModifierSet(xtoks, entityMention.getNonDescriptorStart());
if (descModSet.contains(mentionHeadString)) {
titleMatch = true;
}
}
if (!featureSet.isEmpty()) {
features.addAll(featureSet);
}
if (sameHead) {
features.add("sameHead");
if (modsMatch) {
features.add("modsMatch");
}
else if (nonTheModsMatch) {
features.add("nonTheModsMatch");
}
else {
features.add("modsMisMatch");
}
}
if (titleMatch) {
features.add("titleMatch");
}
return features;
}