protected synchronized IStatus run()

in caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/NameFinderJob.java [105:218]


  protected synchronized IStatus run(IProgressMonitor monitor) {

    
    // TODO: Check if model path changed, compared to last run, if so reload
    // TODO: Check if the model itself changed, compared to last run, if so reload
    if (nameFinder == null) {
      try {
        nameFinder = new MultiModelNameFinder(modelPath, modelTypes);
      } catch (IOException e) {
        return new Status(IStatus.CANCEL, OpenNLPPlugin.ID, e.getMessage());
      }
    }

    if (nameFinder != null) {
      nameFinder.clearAdaptiveData(); // TODO: If model loading fails we get a NPE here!
    
      nameList = new ArrayList<>();
      
      // TODO: Name tokens, should be for the entire text,
      // not just the prev sentences ...
      Set<String> nameTokens = new HashSet<>();
      
      for (Span sentence : sentences) {
        
        // Create token list for sentence
        List<Span> sentenceTokens = new ArrayList<>();
        
        for (Span token : tokens) {
          if (sentence.contains(token)) {
            sentenceTokens.add(token);
          }
        }
        
        String[] tokenStrings = new String[sentenceTokens.size()];
        
        for (int i = 0; i < sentenceTokens.size(); i++) {
          Span token = sentenceTokens.get(i);
          tokenStrings[i] = token.getCoveredText(text).toString();
        }
        
        Map<Integer, String> verifiedNameTokens = new HashMap<>();
        
        // Note: This is slow!
        // iterate over names, to find token indexes
        if (verifiedNames != null) {
          for (Span verifiedName : verifiedNames) {
            boolean isStart = true;
          	
            for (int i = 0; i < sentenceTokens.size(); i++) {
              if (verifiedName.contains(sentenceTokens.get(i))) {
                
                String outcome;
                
                // Need better mechanism here, first token in entity should be start!
                if (isStart) {
                  outcome = NameFinderME.START;
                  isStart = false;
                }
                else {
                  outcome = NameFinderME.CONTINUE;
                }
                
                // TODO: Overlapping names are dangerous here!
                
                // TODO: We could use type information here ... 
                // as part of the outcome!
                verifiedNameTokens.put(i, verifiedName.getType() + "-" + outcome);
                
                StringPattern pattern = StringPattern.recognize(tokenStrings[i]);
                
                boolean useToken = true;
                
                if (ignoreShortTokens && tokenStrings[i].length() < 4) {
                  useToken = false;
                }
                else if (onlyConsiderAllLetterTokens && !pattern.isAllLetter()) {
                  useToken = false;
                }
                else if (onlyConsiderInitialLetterTokens && !pattern.isInitialCapitalLetter()) {
                  useToken = false;
                }
                  
                if (useToken) {
              	  nameTokens.add(verifiedName.getType() + "-" + tokenStrings[i]);
                }
              }
            }
          }
        }
        nameFinder.setRestriction(verifiedNameTokens);
        nameFinder.setNameOnlyTokens(nameTokens);
        
        // TODO: Use multiple name finders here .... 
        ConfidenceSpan[] names = nameFinder.find(tokenStrings);

        for (ConfidenceSpan name : names) {

          // add sentence offset here ...
          int beginIndex = sentenceTokens.get(name.getStart()).getStart();
          int endIndex = sentenceTokens.get(name.getEnd() - 1).getEnd();

          String coveredText = text.substring(beginIndex, endIndex);

          nameList.add(new PotentialAnnotation(beginIndex, endIndex, coveredText,
                  name.getConfidence(), name.getType()));
        }
      }
    }
    
    // TODO: If there is a problem return an error status,
    // and calling client can fetch error message via method call
    // Use OpenNLPPlugin to log errors ...
    return new Status(IStatus.OK, OpenNLPPlugin.ID, "OK");
  }