public void build()

in modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java [42:102]


  public void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,
          ModelGenerationValidator validator, Modelable modelable, int iterations) {
    for (int iteration = 0; iteration < iterations; iteration++) {
      System.out.println("ITERATION: " + iteration);
      System.out.println("\tPerforming Known Entity Annotation");
      System.out.println("\t\tknown size: " + knownEntityProvider.getKnownEntities().size());
      System.out.println("\t\treading data....: ");
      for (String sentence : sentenceProvider.getSentences()) {
        for (String knownEntity : knownEntityProvider.getKnownEntities()) {
          if (sentence.contains(knownEntity)) {
            //if the same sentence has multiple hits should they be annotated separately?
            modelable.addAnnotatedSentence(modelable.annotate(sentence, knownEntity, knownEntityProvider.getKnownEntitiesType()));
          }
        }
      }
      if (sentenceProvider.getSentences().isEmpty()) {
        System.out.println("No sentences in file");
        return;
      }
      if (knownEntityProvider.getKnownEntities().isEmpty()) {
        System.out.println("No known entities in file");
        return;
      }
      System.out.println("\t\twriting annotated sentences....: ");
      modelable.writeAnnotatedSentences();
          System.out.println("\t\tbuilding model.... ");
      modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
      System.out.println("\t\tmodel building complete.... ");
      NameFinderME nf = new NameFinderME(modelable.getModel());
      System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
      System.out.println("\tPerforming NER with new model");
      System.out.println("\t\tPrinting NER Results. Add undesired results to the blacklist file and start over");
      for (String sentence : sentenceProvider.getSentences()) {
        if (!validator.validSentence(sentence)) {
          continue;
        }
        String[] tokens = modelable.tokenizeSentenceToWords(sentence);

        Span[] find = nf.find(tokens);
        nf.clearAdaptiveData();

        String[] namedEntities = Span.spansToStrings(find, tokens);

        for (String namedEntity : namedEntities) {
          System.out.println("\t\t" + namedEntity);
          if (validator.validNamedEntity(namedEntity)) {

            knownEntityProvider.addKnownEntity(namedEntity);
            modelable.addAnnotatedSentence(modelable.annotate(sentence, namedEntity, knownEntityProvider.getKnownEntitiesType()));

          } else {
            System.out.println("\t\t" + namedEntity + "...already blacklisted");
          }
        }
      }
      System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
      System.out.println("\t\tknown size: " + knownEntityProvider.getKnownEntities().size());
    }
    modelable.writeAnnotatedSentences();
    modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
  }