public void process()

in ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java [247:415]


  public void process(JCas jCas, Segment segment) throws AnalysisEngineProcessException {
    PredicateArgumentExtractor predicateArgumentExtractor = new PredicateArgumentExtractor(jCas);

    // Create features for tokens that end UMLS (or other) entities
    Multimap<BaseToken, Feature> endOfEntityFeatures = HashMultimap.create();
    for (IdentifiedAnnotation entity : JCasUtil.select(jCas, IdentifiedAnnotation.class)) {
      if (!entity.getClass().equals(EventMention.class)) {
        List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, entity);
        if (tokens.size() > 0){
        	BaseToken lastToken = tokens.get(tokens.size() - 1);
            String value = String.format("%s_%s", entity.getClass().getSimpleName(), entity.getTypeID());
            endOfEntityFeatures.put(lastToken, new Feature("EndOf", value));
        }
      }
    }
    Random rand = new Random();
    
    //TRY SMOTE algorithm here to generate more minority class samples
    SMOTEplus smote = new SMOTEplus((int)Math.ceil(this.smoteNumOfNeighbors));
        
    // classify tokens within each sentence
    for (Sentence sentence : JCasUtil.selectCovered(jCas, Sentence.class, segment)) {
      List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);

      // during training, the list of all outcomes for the tokens
      List<String> outcomes;
      List<Double> confidenceScores= new ArrayList<>();
      if (this.isTraining()) {
        List<EventMention> events = Lists.newArrayList();
        for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence)) {
          if (event.getClass().equals(EventMention.class)) {
            events.add(event);
          }
        }
        outcomes = this.eventChunking.createOutcomes(jCas, tokens, events);
      }
      // during prediction, the list of outcomes predicted so far
      else {
        outcomes = new ArrayList<String>();
      }

      // get BIO entity tags for each entity type
      int[] entityTypeIDs = new int[] {
          CONST.NE_TYPE_ID_ANATOMICAL_SITE,
          CONST.NE_TYPE_ID_DISORDER,
          CONST.NE_TYPE_ID_DRUG,
          CONST.NE_TYPE_ID_FINDING,
          CONST.NE_TYPE_ID_PROCEDURE,
          CONST.NE_TYPE_ID_UNKNOWN };
      List<IdentifiedAnnotation> entities;
      if (this.isTraining()) {
        entities = Lists.newArrayList();
        for (IdentifiedAnnotation entity : JCasUtil.selectCovered(jCas, IdentifiedAnnotation.class, sentence)) {
          if (!entity.getClass().equals(EventMention.class)) {
            entities.add(entity);
          }
        }
      } else {
        entities = JCasUtil.selectCovered(jCas, IdentifiedAnnotation.class, sentence);
      }
      
      List<ChunkingExtractor> chunkingExtractors = Lists.newArrayList(); 
      for (int typeID : entityTypeIDs) {
        Predicate<IdentifiedAnnotation> hasTypeID = hasEntityType(typeID);
        List<IdentifiedAnnotation> subEntities = Lists.newArrayList(Iterables.filter(entities, hasTypeID));
        chunkingExtractors.add(new ChunkingExtractor("EntityTag", this.entityChunking, jCas, tokens, subEntities));
      }
      
      // add extractor for phase chunks
      List<Chunk> chunks = JCasUtil.selectCovered(jCas, Chunk.class, sentence);
      chunkingExtractors.add(new ChunkingExtractor("PhraseTag", this.phraseChunking, jCas, tokens, chunks));

      // extract features for all tokens
      int tokenIndex = -1;
      int nChunkLabelsBefore = 2;
      int nChunkLabelsAfter = 2;
      int nPreviousClassifications = 2;

      for (BaseToken token : tokens) {
        ++tokenIndex;

        List<Feature> features = new ArrayList<Feature>();

        // features from previous classifications
        for (int i = nPreviousClassifications; i > 0; --i) {
          int index = tokenIndex - i;
          String previousOutcome = index < 0 ? "O" : outcomes.get(index);
          features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
        }
        
        // features from token attributes
        features.addAll(this.tokenFeatureExtractor.extract(jCas, token));

        // features from surrounding tokens
        features.addAll(this.contextFeatureExtractor.extractWithin(jCas, token, sentence));
        
        // features from ends of entities
        features.addAll(endOfEntityFeatures.get(token));

        // features from surrounding entity, phrase, etc. chunk-labels
        for (ChunkingExtractor extractor : chunkingExtractors) {
          features.addAll(extractor.extract(tokenIndex, nChunkLabelsBefore, nChunkLabelsAfter));
        }
        
        // features from semantic roles
        features.addAll(predicateArgumentExtractor.extract(token));

        // apply feature selection, if necessary
        if (this.featureSelection != null) {
          features = this.featureSelection.transform(features);
        }

        // if training, write to data file
        if (this.isTraining()) {
          String outcome = outcomes.get(tokenIndex);
          // if it is an "O" down-sample it
          if (outcome.equals("O")) {
        	  if (rand.nextDouble() <= this.probabilityOfKeepingANegativeExample){
        		  this.dataWriter.write(new Instance<String>(outcome, features));
        	  }		  
          }else{//for minority instances:
        	  Instance<String> minorityInst = new Instance<String>(outcome, features);
        	  this.dataWriter.write(minorityInst);
        	  smote.addInstance(minorityInst);//add minority instances to SMOTE algorithm
          }
        }

        // if predicting, add prediction to outcomes
        else {
//          outcomes.add(this.classifier.classify(features));
          
          Map.Entry<String, Double> maxEntry = null;
          for( Map.Entry<String, Double> entry: this.classifier.score(features).entrySet() ){
          	if(maxEntry == null || entry.getValue().compareTo(maxEntry.getValue()) > 0){
          		maxEntry = entry;
          	}
          }
          
          outcomes.add(maxEntry.getKey());
          confidenceScores.add(maxEntry.getValue());
        }
      }

      // during prediction, convert chunk labels to events and add them to the CAS
      if (!this.isTraining()) {
        List<EventMention> createdEvents = this.eventChunking.createChunks(jCas, tokens, outcomes);
        int mentionidx =0;
        for(EventMention mention : createdEvents){
          mention.setConfidence(confidenceScores.get(mentionidx).floatValue());
          mentionidx++;
          if(mention.getEvent() == null){
            Event event = new Event(jCas);
            EventProperties props = new EventProperties(jCas);
            props.addToIndexes();
            event.setProperties(props);
            mention.setEvent(event);
            event.addToIndexes();
          }
        }
      }
    }
    if(this.isTraining() && this.smoteNumOfNeighbors >= 1){ //add synthetic instances to datawriter, if smote is selected
    	Iterable<Instance<String>> syntheticInsts = smote.populateMinorityClass();
    	for( Instance<String> sytheticInst: syntheticInsts){
    		this.dataWriter.write(sytheticInst);
    	}
    }
    
  }