void runNameFinder()

in caseditor-opennlp-plugin/src/main/java/org/apache/opennlp/caseditor/namefinder/EntityContentProvider.java [341:530]


  void runNameFinder() {
    
    // TODO: Check if sentences do overlap
    // TODO: Check if tokens do overlap
    // TODO: Check that tokens do not intersect with sentence span
    
    IPreferenceStore store = editor.getCasDocumentProvider().getTypeSystemPreferenceStore(editor.getEditorInput());
    
    // TODO: All preferences should be retrieved when the name finder executed!
    // Just move it down the run method ...
    nameTypeNames = store.getString(OpenNLPPreferenceConstants.NAME_TYPE).split(",");
    
    for (int i = 0; i < nameTypeNames.length; i++) {
      nameTypeNames[i] = nameTypeNames[i].trim();
      
      if (nameTypeNames[i].isEmpty()) {
        nameFinderView.setMessage("Name type name(s) must be set!");
        return;
      }
    }
    
    confirmedEntities.clear();
    
    for (String nameTypeName : nameTypeNames) {
      Type nameType = input.getCAS().getTypeSystem().getType(nameTypeName); 
      
      // TODO: Do error handling!
      if (nameType == null)
        return;
      
      FSIndex<AnnotationFS> nameAnnotations = input.getCAS()
          .getAnnotationIndex(nameType);

      for (AnnotationFS nameAnnotation : nameAnnotations) {

        // TODO: Entity must have a type ...
        PotentialAnnotation entity = new PotentialAnnotation(nameAnnotation.getBegin(),
                nameAnnotation.getEnd(), nameAnnotation.getCoveredText(), null,
                nameAnnotation.getType().getName());
        confirmedEntities.add(entity); // TODO: This needs to go into a second list!
      }
    }
    
    nameFinder.addJobChangeListener(new NameFinderJobListener());
    
    String sentenceTypeName = store.getString(OpenNLPPreferenceConstants.SENTENCE_TYPE);
    
    if (sentenceTypeName.isEmpty()) {
      nameFinderView.setMessage("Sentence type is not set!");
      return;
    }
    
    String[] modelPathes = store.getString(OpenNLPPreferenceConstants.NAME_FINDER_MODEL_PATH).split(",");
    
    for (int i = 0; i < modelPathes.length; i++) {
      modelPathes[i] = modelPathes[i].trim();
      
      if (modelPathes[i].isEmpty()) {
        nameFinderView.setMessage("Model path is not set!");
        return;
      }
    }
    
    CAS cas = input.getCAS();
    
    String additionalSentenceTypes = store.getString(OpenNLPPreferenceConstants.ADDITIONAL_SENTENCE_TYPE);
    
    String text = cas.getDocumentText();

    if (text != null) {

      Type[] sentenceTypes = UIMAUtil.splitTypes(
          sentenceTypeName + "," +  additionalSentenceTypes, ',', cas.getTypeSystem());
      
      if (sentenceTypes == null) {
        nameFinderView.setMessage("Sentence type does not exist in type system!");
        return;
      }
      
      String tokenName = store.getString(OpenNLPPreferenceConstants.TOKEN_TYPE);
      
      if (tokenName.isEmpty()) {
        nameFinderView.setMessage("Token type name is not set!");
        return;
      }
      
      Type tokenType = cas.getTypeSystem().getType(tokenName);
      
      if (tokenType == null) {
        nameFinderView.setMessage("Token type does not exist in type system!");
        return;
      }
      
      List<Span> sentences = new ArrayList<>();
      List<Span> tokens = new ArrayList<>();
      
      for (Iterator<AnnotationFS> sentenceIterator = 
          UIMAUtil.createMultiTypeIterator(cas, sentenceTypes);
          sentenceIterator.hasNext();) {
        
        AnnotationFS sentenceAnnotation = sentenceIterator.next();
        
        // TODO: Add code to detect overlapping sentences ... not allowed!
        
        sentences.add(new Span(sentenceAnnotation.getBegin(), sentenceAnnotation.getEnd()));
        
        // Performance Note: 
        // The following code has O(n^2) complexity, can be optimized
        // by using a token iterate over all tokens and manual weaving.                  
        
        FSIndex<AnnotationFS> allTokens = cas.getAnnotationIndex(tokenType);
        
        ContainingConstraint containingConstraint = 
            new ContainingConstraint(sentenceAnnotation);
        
        Iterator<AnnotationFS> containingTokens = cas.createFilteredIterator(
            allTokens.iterator(), containingConstraint);
        
        while (containingTokens.hasNext()) {
          AnnotationFS token = containingTokens.next();
          
          tokens.add(new Span(token.getBegin(), token.getEnd()));
        }
      }
      
      List<Span> nameSpans = new ArrayList<>();

      for (String nameTypeName : nameTypeNames) {
        
        Type nameType = cas.getTypeSystem().getType(nameTypeName); 
  
        if (nameType == null) {
          nameFinderView.setMessage("Name type " + nameTypeName + " does not exist in type system!");
          return;
        }
        
        FSIndex<AnnotationFS> nameAnnotations = cas
            .getAnnotationIndex(nameType);

        for (AnnotationFS nameAnnotation : nameAnnotations) {
          nameSpans.add(new Span(nameAnnotation.getBegin(), nameAnnotation.getEnd(),
                  nameAnnotation.getType().getName()));
        }
      }
      
      // Bug: Changing the data of the name finder will cause an issue if it is already running!
      
      nameFinder.setText(text);
      
      if (sentences.size() == 0) {
        nameFinderView.setMessage("CAS must at least contain one sentence!");
        return;
      }
      
      nameFinder.setSentences(sentences.toArray(new Span[0]));
      
      if (tokens.size() == 0) {
        nameFinderView.setMessage("CAS must at least contain one token within a sentence!");
        return;
      }
      
      nameFinder.setTokens(tokens.toArray(new Span[0]));
      nameFinder.setModelPath(modelPathes, nameTypeNames);
      
      if (!nameFinder.isSystem()) {
        nameFinder.setSystem(true);
      }
      
      boolean isRecallBoostingEnabled = 
          store.getBoolean(OpenNLPPreferenceConstants.ENABLE_CONFIRMED_NAME_DETECTION);
      
      if (isRecallBoostingEnabled) {
        nameFinder.setVerifiedNames(nameSpans.toArray(new Span[0]));
      }
      else {
        nameFinder.setVerifiedNames(null);
      }
      
      nameFinder.setIgnoreShortTokens(store.getBoolean(
          OpenNLPPreferenceConstants.IGNORE_SHORT_TOKENS));
      
      nameFinder.setOnlyConsiderAllLetterTokens(store.getBoolean(
          OpenNLPPreferenceConstants.ONLY_CONSIDER_ALL_LETTER_TOKENS));
      
      nameFinder.setOnlyConsiderInitialCapitalLetterTokens(store.getBoolean(
          OpenNLPPreferenceConstants.ONLY_CONSIDER_INITIAL_CAPITAL_TOKENS));
      
      nameFinder.schedule();
    }
  }