public List find()

in geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java [90:177]


  public List<GazetteerEntry> find(String searchString, int rowsReturned, String whereClause) {
    ArrayList<GazetteerEntry> linkedData = new ArrayList<>();
    searchString = cleanInput(searchString);
    if (searchString.isEmpty()) {
      return linkedData;
    }
    try {
      /*
       * build the search string Sometimes no country context is found. In this
       * case the code variables will be empty strings
       */
      String placeNameQueryString = "placename:(" + searchString.toLowerCase() + ") " + "AND " + whereClause;
      if (searchString.trim().contains(" ") && useHierarchyField) {
        placeNameQueryString = "(placename:(" + searchString.toLowerCase() + ") AND hierarchy:(" + formatForHierarchy(searchString) + "))"
            + " AND " + whereClause;
      }

      /*
       * check the cache and go no further if the records already exist
       */
      ArrayList<GazetteerEntry> get = GazetteerSearchCache.get(placeNameQueryString);
      if (get != null) {

        return get;
      }
      /*
       * search the placename
       */
      QueryParser parser = new QueryParser(placeNameQueryString, opennlpAnalyzer);
      Query q = parser.parse(placeNameQueryString);
      //Filter filter = new QueryWrapperFilter(new QueryParser(Version.LUCENE_48, whereClause, opennlpAnalyzer).parse(whereClause));      

      TopDocs bestDocs = opennlpSearcher.search(q, rowsReturned);
      Double maxscore = 0d;
      for (int i = 0; i < bestDocs.scoreDocs.length; ++i) {
        int docId = bestDocs.scoreDocs[i].doc;
        double sc = bestDocs.scoreDocs[i].score;
        if (maxscore.compareTo(sc) < 0) {
          maxscore = sc;
        }
        Document d = opennlpSearcher.doc(docId);
        List<IndexableField> fields = d.getFields();

        String lat = d.get("latitude");
        String lon = d.get("longitude");
        String placename = d.get("placename");
        String parentid = d.get("countrycode").toLowerCase();
        String provid = d.get("admincode");
        String itemtype = d.get("loctype");
        String source = d.get("gazsource");
        String hier = d.get("hierarchy");

        GazetteerEntry ge = new GazetteerEntry(parentid, String.valueOf(docId), placename, itemtype);
        ge.getScoreMap().put("lucene", sc);
        ge.setIndexID(String.valueOf(docId));
        ge.setSource(source);
        ge.setLatitude(Double.valueOf(lat));
        ge.setLongitude(Double.valueOf(lon));
        ge.setProvinceCode(provid);
        ge.setCountryCode(parentid);
        ge.setHierarchy(hier);
        for (IndexableField field : fields) {
          ge.getIndexData().put(field.name(), d.get(field.name()));
        }

        /*
         * only want hits above the levenshtein thresh. This should be a low
         * thresh due to the use of the hierarchy field in the index
         */
        // if (normLev > scoreCutoff) {
        if (ge.getItemParentID().equalsIgnoreCase(parentid) || parentid.equalsIgnoreCase("")) {
          //make sure we don't produce a duplicate
          if (!linkedData.contains(ge)) {
            linkedData.add(ge);
            /*
             * add the records to the cache for this query
             */
            GazetteerSearchCache.put(placeNameQueryString, linkedData);
          }
        }
      }

    } catch (IOException | ParseException ex) {
      LOG.error(ex.getLocalizedMessage(), ex);
    }

    return linkedData;
  }