in geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ModelBasedScorer.java [88:119]
public Map<Integer, String> generateProximalFeatures(List<LinkedSpan<BaseLink>> linkedSpans,
Span[] sentenceSpans, String docText, int radius) {
Map<Integer, String> featureBags = new HashMap<>();
Map<Integer, Integer> nameMentionMap = new HashMap<>();
/*
* iterator over the map that contains a mapping of every country code to
* all of its mentions in the document
*/
for (int i = 0; i < linkedSpans.size(); i++) {
LinkedSpan<?> span = linkedSpans.get(i);
if (span.getLinkedEntries().isEmpty()) {
//don't care about spans that did not get linked to anything at all; nothing to work with
continue;
}
/*
* get the sentence the name span was found in, the beginning of the
* sentence will suffice as a centroid for feature generation around the
* named entity
*/
Integer mentionIdx = sentenceSpans[span.getSentenceid()].getStart();
nameMentionMap.put(i, mentionIdx);
}
/*
* now associate each span to a string that will be used for categorization
* against the model.
*/
for (Map.Entry<Integer, Integer> entry : nameMentionMap.entrySet()) {
featureBags.put(entry.getKey(), getTextChunk(entry.getValue(), docText, radius));
}
return featureBags;
}