in opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java [357:406]
public Boolean isASubChunk(ParseTreeChunk ch) {
List<String> lems = ch.getLemmas();
List<String> poss = ch.POSs;
if (this.lemmas.size() < lems.size())
return false; // sub-chunk should be shorter than chunk
boolean notSubChunkWithGivenAlignment = false, unComparable = false;
for (int i = 0; i < lems.size() && i < this.lemmas.size(); i++) {
// both lemma and pos are different
if (!this.POSs.get(i).equals(poss.get(i)) && !this.lemmas.get(i).equals(lems.get(i)) ){
unComparable = true;
break;
}
// this => * ch=> run
if (!this.lemmas.get(i).equals(lems.get(i)) && this.lemmas.get(i).equals("*"))
notSubChunkWithGivenAlignment = true;
}
if (!notSubChunkWithGivenAlignment && !unComparable)
return true;
List<String> thisPOS = new ArrayList<>(this.POSs);
Collections.reverse(thisPOS);
List<String> chPOS = new ArrayList<>(poss);
Collections.reverse(chPOS);
List<String> thisLemma = new ArrayList<>(this.lemmas);
Collections.reverse(thisLemma );
List<String> chLemma = new ArrayList<>(lems);
Collections.reverse(chLemma);
notSubChunkWithGivenAlignment = false; unComparable = false;
for (int i = lems.size()-1 ; i>=0; i--) {
// both lemma and pos are different
if (!thisPOS.get(i).equals(chPOS.get(i)) && !thisLemma.get(i).equals(chLemma.get(i)) ){
unComparable = true;
break;
}
// this => * ch=> run
if (!thisLemma.get(i).equals(chLemma.get(i)) && thisLemma.get(i).equals("*"))
notSubChunkWithGivenAlignment = true;
}
if (!notSubChunkWithGivenAlignment && !unComparable)
return true;
else
return false; // then ch is redundant and needs to be removed
}