in opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java [52:148]
public void run(String[] args) {
if (0 == args.length) {
logger.info(getHelp());
}
else {
// TODO: Ask Mark if we can remove the type, the user knows upfront if s/he tries
// to link place names or company mentions ...
String entityType = "location";
// Load the properties, they should contain everything that is necessary to instantiate
// the component
// TODO: Entity Linker Properties constructor should not duplicate code
EntityLinkerProperties properties;
try {
properties = new EntityLinkerProperties(new File(args[0]));
}
catch (IOException e) {
throw new TerminateToolException(-1, "Failed to load the properties file!");
}
// TODO: It should not just throw Exception.
EntityLinker<? extends Span> entityLinker;
try {
entityLinker = EntityLinkerFactory.getLinker(entityType, properties);
}
catch (Exception e) {
throw new TerminateToolException(-1, "Failed to instantiate the Entity Linker: " + e.getMessage());
}
PerformanceMonitor perfMon = new PerformanceMonitor("sent");
perfMon.start();
try (ObjectStream<String> untokenizedLineStream = new PlainTextByLineStream(
new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())) {
List<NameSample> document = new ArrayList<>();
String line;
while ((line = untokenizedLineStream.read()) != null) {
if (line.trim().isEmpty()) {
// Run entity linker ... and output result ...
StringBuilder text = new StringBuilder();
Span[] sentences = new Span[document.size()];
Span[][] tokensBySentence = new Span[document.size()][];
Span[][] namesBySentence = new Span[document.size()][];
for (int i = 0; i < document.size(); i++) {
NameSample sample = document.get(i);
namesBySentence[i] = sample.getNames();
int sentenceBegin = text.length();
Span[] tokens = new Span[sample.getSentence().length];
// for all tokens
for (int ti = 0; ti < sample.getSentence().length; ti++) {
int tokenBegin = text.length();
text.append(sample.getSentence()[ti]);
text.append(" ");
tokens[ti] = new Span(tokenBegin, text.length());
}
tokensBySentence[i] = tokens;
sentences[i] = new Span(sentenceBegin, text.length());
text.append("\n");
}
List<? extends Span> linkedSpans =
entityLinker.find(text.toString(), sentences, tokensBySentence, namesBySentence);
for (Span linkedSpan : linkedSpans) {
logger.info(linkedSpan.toString());
}
perfMon.incrementCounter(document.size());
document.clear();
}
else {
document.add(NameSample.parse(line, false));
}
}
}
catch (IOException e) {
CmdLineUtil.handleStdinIoError(e);
}
perfMon.stopAndPrintFinalResult();
}
}