in opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java [145:195]
private void collectCoordinatedNounPhraseMentions(Parse np, List<Mention> entities) {
logger.trace("collectCoordNp: {}", np);
//exclude nps with UCPs inside.
List<Parse> sc = np.getSyntacticChildren();
for (Parse scp : sc) {
if (scp.getSyntacticType().equals("UCP") || scp.getSyntacticType().equals("NX")) {
return;
}
}
List<Parse> npTokens = np.getTokens();
boolean inCoordinatedNounPhrase = false;
int lastNpTokenIndex = headFinder.getHeadIndex(np);
for (int ti = lastNpTokenIndex - 1; ti >= 0; ti--) {
Parse tok = npTokens.get(ti);
String tokStr = tok.toString();
if ((tokStr.equals("and") || tokStr.equals("or")) && !isPartOfName(tok)) {
if (lastNpTokenIndex != ti) {
if (ti - 1 >= 0 && (npTokens.get(ti - 1)).getSyntacticType().startsWith("NN")) {
Span npSpan = new Span((npTokens.get(ti + 1)).getSpan().getStart(),
npTokens.get(lastNpTokenIndex).getSpan().getEnd());
Mention snpExtent = new Mention(npSpan, npSpan, tok.getEntityId(), tok, "CNP");
entities.add(snpExtent);
logger.debug("Adding extent for conjunction in: {} preceded by {}",
np, npTokens.get(ti-1).getSyntacticType());
inCoordinatedNounPhrase = true;
}
else {
break;
}
}
lastNpTokenIndex = ti - 1;
}
else if (inCoordinatedNounPhrase && tokStr.equals(",")) {
if (lastNpTokenIndex != ti) {
Span npSpan = new Span((npTokens.get(ti + 1)).getSpan().getStart(),
npTokens.get(lastNpTokenIndex).getSpan().getEnd());
Mention snpExtent = new Mention(npSpan, npSpan, tok.getEntityId(), tok ,"CNP");
entities.add(snpExtent);
logger.debug("Adding extent for comma in: {}", np);
}
lastNpTokenIndex = ti - 1;
}
else if (inCoordinatedNounPhrase && ti == 0 && lastNpTokenIndex >= 0) {
Span npSpan = new Span((npTokens.get(ti)).getSpan().getStart(),
npTokens.get(lastNpTokenIndex).getSpan().getEnd());
Mention snpExtent = new Mention(npSpan, npSpan, tok.getEntityId(), tok, "CNP");
entities.add(snpExtent);
logger.debug("Adding extent for start coord in: {}", np);
}
}
}