public static HashMap extract()

in ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/classifier/WindowedHistoryAttributeClassifier.java [149:315]
114 lines of code
29 McCabe index (conditional complexity)

   public static HashMap<String, Boolean> extract( JCas jCas,
                                                   final List<Sentence> sentences,
                                                   final Sentence sentence,
                                                   final int sentenceIndex,
                                                   Annotation arg ) {
      final SpanComparator spanComparator = new SpanComparator();
      HashMap<String, Boolean> vfeat = new HashMap<>();
      for ( String feat : FeatureIndex ) {
         vfeat.put( feat, false );
      }

      Sentence sEntity = sentence;

      DocumentAnnotation docAnnot = null;

      Collection<DocumentAnnotation> docAnnots =
            JCasUtil.select( jCas, DocumentAnnotation.class );

      if ( !docAnnots.isEmpty() ) {
         Object[] docAnnotArray = docAnnots.toArray();
         docAnnot = (DocumentAnnotation)docAnnotArray[ 0 ];
      }

      if ( sEntity != null ) {

         // but I actually need to find out if this sentence is preceded by
         // a newline or if I have to find the preceding one that does.
         if ( docAnnot != null ) {
            String doctext = docAnnot.getCoveredText();
            int sentStart = sEntity.getBegin();

            if ( sentStart > 0 ) {
               boolean argInHistSection = false;

               List<Sentence> sentList = sentences;

               // get index of sEntity
               int currind = sentenceIndex;

               if ( currind == 0 ) {
                  argInHistSection = isInHistSection( sEntity );
               } else {
                  currind--;
                  Sentence prevSent = sentList.get( currind );
                  String tweenSents = "";
                  try {
                     tweenSents = doctext.substring( prevSent.getEnd(), sentStart );
                  } catch ( IndexOutOfBoundsException e ) {
                     // this is of no consequence
                     tweenSents = "";
                  }

                  if ( tweenSents.indexOf( "\n" ) != -1 ) {
                     // there is a newline between this sentence and prior sentence
                     argInHistSection = isInHistSection( sEntity );
                  } else if ( currind == 0 ) {
                     argInHistSection = isInHistSection( prevSent );
                  } else {
                     while ( currind > 0 ) {
                        Sentence currSent = prevSent;
                        currind--;
                        prevSent = sentList.get( currind );

                        sentStart = currSent.getBegin();
                        int prevSentEnd = prevSent.getEnd();

                        try {
                           tweenSents = doctext.substring( prevSentEnd, sentStart );
                        } catch ( StringIndexOutOfBoundsException e ) {
                           tweenSents = "";
                        }

                        if ( tweenSents.indexOf( "\n" ) != -1 || currind == 0 ) {
                           argInHistSection = isInHistSection( currSent );
                           break;
                        } else if ( currind == 0 ) {
                           argInHistSection = isInHistSection( prevSent );
                           break;
                        }
                     }
                  }
               }

               // and here do something with argInHistSection.
               // ie, create the feature
               vfeat.put( IN_HIST_SECTION, argInHistSection );
            }

         }

         // 2) some other identified annotation subsumes this one?

         // Get all IdentifiedAnnotations covering the boundaries of the
         // annotation
         List<IdentifiedAnnotation> lsmentions = JCasUtil.selectCovering( jCas,
               IdentifiedAnnotation.class, arg.getBegin(),
               arg.getEnd() );

         Collections.sort( lsmentions, spanComparator );

         // NB: arg is annotation input to this method. annot is current
         // lsmentions in loop
         for ( IdentifiedAnnotation annot : lsmentions ) {
            if ( annot.getBegin() > arg.getBegin() ) {
               // annot starts after our arg, so if ordered correctly(?)
               // then I break b/c I won't find any more that cover arg
               break;
            }

            // INVARIANT: arg starts at or after annot begins
            if ( annot.getEnd() < arg.getEnd() ) {
               // INVARIANT: arg ends at or after annot ends
               continue;
            } else if ( !DependencyUtility.equalCoverage(
                  DependencyUtility.getNominalHeadNode( jCas, annot ),
                  DependencyUtility.getNominalHeadNode( jCas, arg ) ) ) {
               // INVARIANT: arg start at or before annot starts
               // INVARIANT: arg ends at or before annot ends
               // INVARIANT: ergo, arg falls within bounds of annot
               // now verify that annot is an EventMention or EntityMention
               if ( (annot instanceof EntityMention) || (annot instanceof EventMention) ) {
                  // annot has boundaries at or exceeding those of arg.
                  // They also have different head nodes (I guess)
                  // and annot is either an EntityMention of EventMention
                  vfeat.put( SUBSUMED_ANNOT, true );
                  break; // no reason to keep checking
               }
            }
         }

         // 3) some chunk subsumes this?
         List<Chunk> lschunks = JCasUtil.selectPreceding( jCas, Chunk.class, arg, 5 );
         lschunks.addAll( JCasUtil.selectFollowing( jCas, Chunk.class, arg, 5 ) );
         for ( Chunk chunk : lschunks ) {
            if ( chunk.getBegin() > arg.getBegin() ) {
               break;
            }
            if ( chunk.getEnd() < arg.getEnd() ) {
               continue;
            } else if ( !DependencyUtility.equalCoverage(
                  DependencyUtility.getNominalHeadNode( jCas, chunk ),
                  DependencyUtility.getNominalHeadNode( jCas, arg ) ) ) {
               // the case that annot is a superset
               vfeat.put( SUBSUMED_CHUNK, true );
            }
         }
      }


      List<ConllDependencyNode> depnodes = JCasUtil.selectCovered( jCas, ConllDependencyNode.class, arg );
      if ( !depnodes.isEmpty() ) {
         ConllDependencyNode depnode = DependencyUtility.getNominalHeadNode( depnodes );

         // 1) check if the head node of the entity mention is really just part of a larger noun phrase
         if ( depnode.getDeprel().matches( "(NMOD|amod|nmod|det|predet|nn|poss|possessive|infmod|partmod|rcmod)" ) ) {
            vfeat.put( POSTCOORD_NMOD, true );
         }

         // 4) search dependency paths for discussion context
         for ( ConllDependencyNode dn : DependencyUtility.getPathToTop( jCas, depnode ) ) {
            if ( isDiscussionContext( dn ) ) {
               vfeat.put( DISCUSSION_DEPPATH, true );
            }
         }
      }
      return vfeat;
   }