in ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/classifier/WindowedHistoryAttributeClassifier.java [149:315]
public static HashMap<String, Boolean> extract( JCas jCas,
final List<Sentence> sentences,
final Sentence sentence,
final int sentenceIndex,
Annotation arg ) {
final SpanComparator spanComparator = new SpanComparator();
HashMap<String, Boolean> vfeat = new HashMap<>();
for ( String feat : FeatureIndex ) {
vfeat.put( feat, false );
}
Sentence sEntity = sentence;
DocumentAnnotation docAnnot = null;
Collection<DocumentAnnotation> docAnnots =
JCasUtil.select( jCas, DocumentAnnotation.class );
if ( !docAnnots.isEmpty() ) {
Object[] docAnnotArray = docAnnots.toArray();
docAnnot = (DocumentAnnotation)docAnnotArray[ 0 ];
}
if ( sEntity != null ) {
// but I actually need to find out if this sentence is preceded by
// a newline or if I have to find the preceding one that does.
if ( docAnnot != null ) {
String doctext = docAnnot.getCoveredText();
int sentStart = sEntity.getBegin();
if ( sentStart > 0 ) {
boolean argInHistSection = false;
List<Sentence> sentList = sentences;
// get index of sEntity
int currind = sentenceIndex;
if ( currind == 0 ) {
argInHistSection = isInHistSection( sEntity );
} else {
currind--;
Sentence prevSent = sentList.get( currind );
String tweenSents = "";
try {
tweenSents = doctext.substring( prevSent.getEnd(), sentStart );
} catch ( IndexOutOfBoundsException e ) {
// this is of no consequence
tweenSents = "";
}
if ( tweenSents.indexOf( "\n" ) != -1 ) {
// there is a newline between this sentence and prior sentence
argInHistSection = isInHistSection( sEntity );
} else if ( currind == 0 ) {
argInHistSection = isInHistSection( prevSent );
} else {
while ( currind > 0 ) {
Sentence currSent = prevSent;
currind--;
prevSent = sentList.get( currind );
sentStart = currSent.getBegin();
int prevSentEnd = prevSent.getEnd();
try {
tweenSents = doctext.substring( prevSentEnd, sentStart );
} catch ( StringIndexOutOfBoundsException e ) {
tweenSents = "";
}
if ( tweenSents.indexOf( "\n" ) != -1 || currind == 0 ) {
argInHistSection = isInHistSection( currSent );
break;
} else if ( currind == 0 ) {
argInHistSection = isInHistSection( prevSent );
break;
}
}
}
}
// and here do something with argInHistSection.
// ie, create the feature
vfeat.put( IN_HIST_SECTION, argInHistSection );
}
}
// 2) some other identified annotation subsumes this one?
// Get all IdentifiedAnnotations covering the boundaries of the
// annotation
List<IdentifiedAnnotation> lsmentions = JCasUtil.selectCovering( jCas,
IdentifiedAnnotation.class, arg.getBegin(),
arg.getEnd() );
Collections.sort( lsmentions, spanComparator );
// NB: arg is annotation input to this method. annot is current
// lsmentions in loop
for ( IdentifiedAnnotation annot : lsmentions ) {
if ( annot.getBegin() > arg.getBegin() ) {
// annot starts after our arg, so if ordered correctly(?)
// then I break b/c I won't find any more that cover arg
break;
}
// INVARIANT: arg starts at or after annot begins
if ( annot.getEnd() < arg.getEnd() ) {
// INVARIANT: arg ends at or after annot ends
continue;
} else if ( !DependencyUtility.equalCoverage(
DependencyUtility.getNominalHeadNode( jCas, annot ),
DependencyUtility.getNominalHeadNode( jCas, arg ) ) ) {
// INVARIANT: arg start at or before annot starts
// INVARIANT: arg ends at or before annot ends
// INVARIANT: ergo, arg falls within bounds of annot
// now verify that annot is an EventMention or EntityMention
if ( (annot instanceof EntityMention) || (annot instanceof EventMention) ) {
// annot has boundaries at or exceeding those of arg.
// They also have different head nodes (I guess)
// and annot is either an EntityMention of EventMention
vfeat.put( SUBSUMED_ANNOT, true );
break; // no reason to keep checking
}
}
}
// 3) some chunk subsumes this?
List<Chunk> lschunks = JCasUtil.selectPreceding( jCas, Chunk.class, arg, 5 );
lschunks.addAll( JCasUtil.selectFollowing( jCas, Chunk.class, arg, 5 ) );
for ( Chunk chunk : lschunks ) {
if ( chunk.getBegin() > arg.getBegin() ) {
break;
}
if ( chunk.getEnd() < arg.getEnd() ) {
continue;
} else if ( !DependencyUtility.equalCoverage(
DependencyUtility.getNominalHeadNode( jCas, chunk ),
DependencyUtility.getNominalHeadNode( jCas, arg ) ) ) {
// the case that annot is a superset
vfeat.put( SUBSUMED_CHUNK, true );
}
}
}
List<ConllDependencyNode> depnodes = JCasUtil.selectCovered( jCas, ConllDependencyNode.class, arg );
if ( !depnodes.isEmpty() ) {
ConllDependencyNode depnode = DependencyUtility.getNominalHeadNode( depnodes );
// 1) check if the head node of the entity mention is really just part of a larger noun phrase
if ( depnode.getDeprel().matches( "(NMOD|amod|nmod|det|predet|nn|poss|possessive|infmod|partmod|rcmod)" ) ) {
vfeat.put( POSTCOORD_NMOD, true );
}
// 4) search dependency paths for discussion context
for ( ConllDependencyNode dn : DependencyUtility.getPathToTop( jCas, depnode ) ) {
if ( isDiscussionContext( dn ) ) {
vfeat.put( DISCUSSION_DEPPATH, true );
}
}
}
return vfeat;
}