in ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java [168:346]
public static HashMap<String, Boolean> extract(JCas jCas,
Annotation arg) {
HashMap<String,Boolean> vfeat = new HashMap<String,Boolean>();
for (String feat : FeatureIndex) {
vfeat.put(feat, false);
}
// find the sentence that entityMention is in
Sentence sEntity = null;
Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
for (Sentence s : sentences) {
if ( s.getBegin()<=arg.getBegin() && s.getEnd()>=arg.getEnd()) {
sEntity = s;
break;
}
}
DocumentAnnotation docAnnot = null;
Collection<DocumentAnnotation> docAnnots =
JCasUtil.select(jCas, DocumentAnnotation.class);
if (!docAnnots.isEmpty())
{
Object[] docAnnotArray = docAnnots.toArray();
docAnnot = (DocumentAnnotation) docAnnotArray[0];
}
if (sEntity!=null) {
// but I actually need to find out if this sentence is preceded by
// a newline or if I have to find the preceding one that does.
if (docAnnot != null)
{
String doctext = docAnnot.getCoveredText();
int sentStart = sEntity.getBegin();
if (sentStart > 0)
{
boolean argInHistSection = false;
// sort the sentences
// TODO: make it so you don't sort every time for same sentence.
ArrayList<Sentence> sentList = new ArrayList<Sentence>(sentences);
Collections.sort(sentList, new AnnotLocationComparator());
// get index of sEntity
int currind = sentList.indexOf(sEntity);
if (currind == 0) {
argInHistSection = isInHistSection(sEntity);
} else {
currind--;
Sentence prevSent = sentList.get(currind);
String tweenSents = "";
try
{
tweenSents = doctext.substring(prevSent.getEnd(), sentStart);
} catch (IndexOutOfBoundsException e)
{
// this is of no consequence
tweenSents = "";
}
if (tweenSents.indexOf("\n") != -1) {
// there is a newline between this sentence and prior sentence
argInHistSection = isInHistSection(sEntity);
} else if (currind == 0) {
argInHistSection = isInHistSection(prevSent);
} else {
while (currind > 0) {
Sentence currSent = prevSent;
currind--;
prevSent = sentList.get(currind);
sentStart = currSent.getBegin();
int prevSentEnd = prevSent.getEnd();
try {
tweenSents = doctext.substring(prevSentEnd, sentStart);
} catch (StringIndexOutOfBoundsException e) {
tweenSents = "";
}
if (tweenSents.indexOf("\n") != -1 || currind == 0) {
argInHistSection = isInHistSection(currSent);
break;
} else if (currind == 0) {
argInHistSection = isInHistSection(prevSent);
break;
}
}
}
}
// and here do something with argInHistSection.
// ie, create the feature
vfeat.put(IN_HIST_SECTION, argInHistSection);
}
}
// 2) some other identified annotation subsumes this one?
// Get all IdentifiedAnnotations covering the boundaries of the
// annotation
List<IdentifiedAnnotation> lsmentions = JCasUtil.selectCovering(jCas,
IdentifiedAnnotation.class, arg.getBegin(),
arg.getEnd());
Collections.sort(lsmentions, new AnnotLocationComparator());
// NB: arg is annotation input to this method. annot is current
// lsmentions in loop
for (IdentifiedAnnotation annot : lsmentions) {
if ( annot.getBegin()>arg.getBegin()) {
// annot starts after our arg, so if ordered correctly(?)
// then I break b/c I won't find any more that cover arg
break;
}
// INVARIANT: arg starts at or after annot begins
if ( annot.getEnd()<arg.getEnd()) {
// INVARIANT: arg ends at or after annot ends
continue;
} else if ( !DependencyUtility.equalCoverage(
DependencyUtility.getNominalHeadNode(jCas, annot),
DependencyUtility.getNominalHeadNode(jCas, arg)) ) {
// INVARIANT: arg start at or before annot starts
// INVARIANT: arg ends at or before annot ends
// INVARIANT: ergo, arg falls within bounds of annot
// now verify that annot is an EventMention or EntityMention
if ((annot instanceof EntityMention) || (annot instanceof EventMention)) {
// annot has boundaries at or exceeding those of arg.
// They also have different head nodes (I guess)
// and annot is either an EntityMention of EventMention
vfeat.put(SUBSUMED_ANNOT, true);
break; // no reason to keep checking
}
}
}
// 3) some chunk subsumes this?
List<Chunk> lschunks = JCasUtil.selectPreceding(jCas, Chunk.class, arg, 5);
lschunks.addAll(JCasUtil.selectFollowing(jCas, Chunk.class, arg, 5));
for (Chunk chunk : lschunks) {
if ( chunk.getBegin()>arg.getBegin()) {
break;
}
if ( chunk.getEnd()<arg.getEnd()) {
continue;
} else if ( !DependencyUtility.equalCoverage(
DependencyUtility.getNominalHeadNode(jCas, chunk),
DependencyUtility.getNominalHeadNode(jCas, arg)) ) {
// the case that annot is a superset
vfeat.put(SUBSUMED_CHUNK, true);
}
}
}
List<ConllDependencyNode> depnodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, arg);
if (!depnodes.isEmpty()) {
ConllDependencyNode depnode = DependencyUtility.getNominalHeadNode(depnodes);
// 1) check if the head node of the entity mention is really just part of a larger noun phrase
if (depnode.getDeprel().matches("(NMOD|amod|nmod|det|predet|nn|poss|possessive|infmod|partmod|rcmod)")) {
vfeat.put(POSTCOORD_NMOD, true);
}
// 4) search dependency paths for discussion context
for (ConllDependencyNode dn : DependencyUtility.getPathToTop(jCas, depnode)) {
if ( isDiscussionContext(dn) ) {
vfeat.put(DISCUSSION_DEPPATH, true);
}
}
}
return vfeat;
}