in geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java [90:177]
public List<GazetteerEntry> find(String searchString, int rowsReturned, String whereClause) {
ArrayList<GazetteerEntry> linkedData = new ArrayList<>();
searchString = cleanInput(searchString);
if (searchString.isEmpty()) {
return linkedData;
}
try {
/*
* build the search string Sometimes no country context is found. In this
* case the code variables will be empty strings
*/
String placeNameQueryString = "placename:(" + searchString.toLowerCase() + ") " + "AND " + whereClause;
if (searchString.trim().contains(" ") && useHierarchyField) {
placeNameQueryString = "(placename:(" + searchString.toLowerCase() + ") AND hierarchy:(" + formatForHierarchy(searchString) + "))"
+ " AND " + whereClause;
}
/*
* check the cache and go no further if the records already exist
*/
ArrayList<GazetteerEntry> get = GazetteerSearchCache.get(placeNameQueryString);
if (get != null) {
return get;
}
/*
* search the placename
*/
QueryParser parser = new QueryParser(placeNameQueryString, opennlpAnalyzer);
Query q = parser.parse(placeNameQueryString);
//Filter filter = new QueryWrapperFilter(new QueryParser(Version.LUCENE_48, whereClause, opennlpAnalyzer).parse(whereClause));
TopDocs bestDocs = opennlpSearcher.search(q, rowsReturned);
Double maxscore = 0d;
for (int i = 0; i < bestDocs.scoreDocs.length; ++i) {
int docId = bestDocs.scoreDocs[i].doc;
double sc = bestDocs.scoreDocs[i].score;
if (maxscore.compareTo(sc) < 0) {
maxscore = sc;
}
Document d = opennlpSearcher.doc(docId);
List<IndexableField> fields = d.getFields();
String lat = d.get("latitude");
String lon = d.get("longitude");
String placename = d.get("placename");
String parentid = d.get("countrycode").toLowerCase();
String provid = d.get("admincode");
String itemtype = d.get("loctype");
String source = d.get("gazsource");
String hier = d.get("hierarchy");
GazetteerEntry ge = new GazetteerEntry(parentid, String.valueOf(docId), placename, itemtype);
ge.getScoreMap().put("lucene", sc);
ge.setIndexID(String.valueOf(docId));
ge.setSource(source);
ge.setLatitude(Double.valueOf(lat));
ge.setLongitude(Double.valueOf(lon));
ge.setProvinceCode(provid);
ge.setCountryCode(parentid);
ge.setHierarchy(hier);
for (IndexableField field : fields) {
ge.getIndexData().put(field.name(), d.get(field.name()));
}
/*
* only want hits above the levenshtein thresh. This should be a low
* thresh due to the use of the hierarchy field in the index
*/
// if (normLev > scoreCutoff) {
if (ge.getItemParentID().equalsIgnoreCase(parentid) || parentid.equalsIgnoreCase("")) {
//make sure we don't produce a duplicate
if (!linkedData.contains(ge)) {
linkedData.add(ge);
/*
* add the records to the cache for this query
*/
GazetteerSearchCache.put(placeNameQueryString, linkedData);
}
}
}
} catch (IOException | ParseException ex) {
LOG.error(ex.getLocalizedMessage(), ex);
}
return linkedData;
}