public void process()

in ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorBIO.java [99:206]


  public void process(JCas jcas) throws AnalysisEngineProcessException {
    LOGGER.info( "Processing ..." );
    String uri=null;
    try{
      uri = ViewUriUtil.getURI(jcas).toString();
      LOGGER.info(String.format("Processing file with uri %s", uri));
    }catch(CASRuntimeException e){
      LOGGER.debug("No uri found, probably not a big deal unless this is an evaluation.");
    }
    
    if(featConfig == FEAT_CONFIG.LINE_POS || featConfig == FEAT_CONFIG.CHAR_POS || featConfig == FEAT_CONFIG.CHAR_SHAPE_POS){
      buildDocEndlineModel(jcas);
    }
    
    for(Segment seg : JCasUtil.select(jcas, Segment.class)){
      // keep track of next sentence during training
      List<Sentence> sents = JCasUtil.selectCovered(jcas, Sentence.class, seg);
      int sentInd = 0;
      Sentence nextSent = sents.size() > 0 ? sents.get(sentInd++) : null;
      int startInd=0;
      
      // Iterate over every character in the Segment and classify it as Begin, Inside, or Outside a Sentence
      String prevOutcome = "O";
      String segText = seg.getCoveredText();
      for(int ind = 0; ind < segText.length(); ind++){
        List<Feature> feats = new ArrayList<>();
        
        char curChar = segText.charAt(ind);
        
        // Start collecting features:
        feats.add(new Feature("PrevOutcome", prevOutcome));
        
        // all systems get to know about the current char they're classifying (i.e. is this a period)
        feats.addAll(getCharFeatures(curChar, "Character"));

        if(featConfig == FEAT_CONFIG.CHAR || featConfig == FEAT_CONFIG.CHAR_POS || featConfig == FEAT_CONFIG.CHAR_SHAPE || featConfig == FEAT_CONFIG.CHAR_SHAPE_POS){
          for(int window = -WINDOW_SIZE; window <= WINDOW_SIZE; window++){
            if(ind+window >= 0 && ind+window < segText.length()){
              char conChar = segText.charAt(ind+window);
              feats.addAll(getCharFeatures(conChar, "CharOffset_"+window));
            }
          }
        }
        
        
        String nextToken = getNextToken(segText, ind);
        String prevToken = getPrevToken(segText, ind);
        feats.addAll(getTokenFeatures(prevToken, nextToken, "Token")); 

        if(featConfig == FEAT_CONFIG.LINE_POS || featConfig == FEAT_CONFIG.CHAR_POS || featConfig == FEAT_CONFIG.CHAR_SHAPE_POS){
          feats.addAll(getPositionFeatures(curChar, ind, segText, nextToken));
        }

        String outcome;
        int casInd = seg.getBegin() + ind;
        if(this.isTraining()){
          // if ind pointer has passed nextSent pointer advance nextSent
          while(nextSent != null && nextSent.getEnd() < casInd && sentInd < sents.size()){
            nextSent = sents.get(sentInd++);
          }
          if(nextSent == null){
            outcome = "O";
          }else if(casInd < nextSent.getBegin()){
            // current index is prior to next sentence
            outcome = "O";
          }else if(prevOutcome.equals("O")){
            // current index is in sentence but just after a character that was out of the sentence
            outcome = "B";
          }else{
            // current index is in the middle of a sentence
            outcome = "I";
          }
          this.dataWriter.write(new Instance<String>(outcome, feats));
        }else{
          if(!prevOutcome.equals("O") && Character.isLetterOrDigit(curChar)){
            outcome = "I";
          }else{
            outcome = this.classifier.classify(feats);
            // This shouldn't be necessary, but if the learning algorithm fails, we need to correct it so
            // that our accounting works. Only a B or O can follow an O, if classifier predicts "I", switch
            // it to a "B".
            if(outcome.equals("I") && prevOutcome.equals("O")){
              outcome = "B";
            }
            if(outcome.equals("B")) startInd = casInd;
            else if(outcome.equals("O") && 
                (prevOutcome.equals("I") || prevOutcome.equals("B"))){
              // just ended a sentence
              int endInd = casInd;
              while(endInd > startInd && Character.isWhitespace(segText.charAt(endInd-seg.getBegin()-1))){
                endInd--;
              }

              if(endInd > startInd){
                makeSentence(jcas, startInd, endInd);
              }
            }
          }
        }
        prevOutcome = outcome;
      }
      // One final sentence at the end of the segment if we were in the middle of one when we ran out of characters.
      if(!this.isTraining() && !prevOutcome.equals("O")){
        // segment ended with a sentence
        makeSentence(jcas, startInd, seg.getEnd());
      }
    }
  }