private static Map getProvData()

in geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesProcessor.java [65:115]


  private static Map<String, AdminBoundary> getProvData(File adm1CodesLookupFile, Map<String, String> ccodes) {
    System.out.println("Attempting to read geonames province data from: " + adm1CodesLookupFile.getPath());

    Map<String, AdminBoundary> outmap = new HashMap<>();
    Set<String> nullcodes = new HashSet<>();
    try (BufferedReader reader = new BufferedReader(new FileReader(adm1CodesLookupFile))){
      String line;
      while ((line = reader.readLine()) != null) {

        // String line = reader.readLine();
        String[] values = line.split(TAB);
        if (values.length != 4) {
          throw new IOException("improperly formatted province lookup file");
        }
        String ccode = values[0].toLowerCase();

        String[] split = ccode.split("\\.");
        String pcode = "";
        if (split.length == 2) {
          //System.out.println(split);
          ccode = split[0];
          pcode = split[1];
        }

        String pname = values[2];

        if (ccode.matches(REGEX_NUMBERS)) {
          String code = ccode;
          ccode = pcode;
          pcode = code;
        }

        String cname = ccodes.get(ccode);

        if (cname == null) {
          nullcodes.add(ccode);
        }
        AdminBoundary data = new AdminBoundary(ccode, cname, pcode, pname, "NO_DATA_FOUND", "NO_DATA_FOUND", cname, pname, "NO_DATA_FOUND");
        //  System.out.println(data);
        outmap.put(ccode + "." + pcode, data);

      }
      System.out.println("INFO: there were " + nullcodes.size() + " null prov codes. This is due to inconsistencies in reference data.");
    } catch (IOException ex) {
      ex.printStackTrace();
    }
    System.out.println("Successfully read geonames province data from: " + adm1CodesLookupFile.getPath());

    return outmap;

  }