in geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesProcessor.java [65:115]
private static Map<String, AdminBoundary> getProvData(File adm1CodesLookupFile, Map<String, String> ccodes) {
System.out.println("Attempting to read geonames province data from: " + adm1CodesLookupFile.getPath());
Map<String, AdminBoundary> outmap = new HashMap<>();
Set<String> nullcodes = new HashSet<>();
try (BufferedReader reader = new BufferedReader(new FileReader(adm1CodesLookupFile))){
String line;
while ((line = reader.readLine()) != null) {
// String line = reader.readLine();
String[] values = line.split(TAB);
if (values.length != 4) {
throw new IOException("improperly formatted province lookup file");
}
String ccode = values[0].toLowerCase();
String[] split = ccode.split("\\.");
String pcode = "";
if (split.length == 2) {
//System.out.println(split);
ccode = split[0];
pcode = split[1];
}
String pname = values[2];
if (ccode.matches(REGEX_NUMBERS)) {
String code = ccode;
ccode = pcode;
pcode = code;
}
String cname = ccodes.get(ccode);
if (cname == null) {
nullcodes.add(ccode);
}
AdminBoundary data = new AdminBoundary(ccode, cname, pcode, pname, "NO_DATA_FOUND", "NO_DATA_FOUND", cname, pname, "NO_DATA_FOUND");
// System.out.println(data);
outmap.put(ccode + "." + pcode, data);
}
System.out.println("INFO: there were " + nullcodes.size() + " null prov codes. This is due to inconsistencies in reference data.");
} catch (IOException ex) {
ex.printStackTrace();
}
System.out.println("Successfully read geonames province data from: " + adm1CodesLookupFile.getPath());
return outmap;
}