in modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java [42:102]
public void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,
ModelGenerationValidator validator, Modelable modelable, int iterations) {
for (int iteration = 0; iteration < iterations; iteration++) {
System.out.println("ITERATION: " + iteration);
System.out.println("\tPerforming Known Entity Annotation");
System.out.println("\t\tknown size: " + knownEntityProvider.getKnownEntities().size());
System.out.println("\t\treading data....: ");
for (String sentence : sentenceProvider.getSentences()) {
for (String knownEntity : knownEntityProvider.getKnownEntities()) {
if (sentence.contains(knownEntity)) {
//if the same sentence has multiple hits should they be annotated separately?
modelable.addAnnotatedSentence(modelable.annotate(sentence, knownEntity, knownEntityProvider.getKnownEntitiesType()));
}
}
}
if (sentenceProvider.getSentences().isEmpty()) {
System.out.println("No sentences in file");
return;
}
if (knownEntityProvider.getKnownEntities().isEmpty()) {
System.out.println("No known entities in file");
return;
}
System.out.println("\t\twriting annotated sentences....: ");
modelable.writeAnnotatedSentences();
System.out.println("\t\tbuilding model.... ");
modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
System.out.println("\t\tmodel building complete.... ");
NameFinderME nf = new NameFinderME(modelable.getModel());
System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
System.out.println("\tPerforming NER with new model");
System.out.println("\t\tPrinting NER Results. Add undesired results to the blacklist file and start over");
for (String sentence : sentenceProvider.getSentences()) {
if (!validator.validSentence(sentence)) {
continue;
}
String[] tokens = modelable.tokenizeSentenceToWords(sentence);
Span[] find = nf.find(tokens);
nf.clearAdaptiveData();
String[] namedEntities = Span.spansToStrings(find, tokens);
for (String namedEntity : namedEntities) {
System.out.println("\t\t" + namedEntity);
if (validator.validNamedEntity(namedEntity)) {
knownEntityProvider.addKnownEntity(namedEntity);
modelable.addAnnotatedSentence(modelable.annotate(sentence, namedEntity, knownEntityProvider.getKnownEntitiesType()));
} else {
System.out.println("\t\t" + namedEntity + "...already blacklisted");
}
}
}
System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size());
System.out.println("\t\tknown size: " + knownEntityProvider.getKnownEntities().size());
}
modelable.writeAnnotatedSentences();
modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
}