in geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java [58:118]
public static void readFile(File gazetteerInput, File outputCountryContextfile, IndexWriter w)
throws IOException {
List<String> ccfileentries = new ArrayList<>();
List<String> fields = new ArrayList<>();
int counter = 0;
System.out.println("reading gazetteer data from Regions file...........");
String line;
try (BufferedReader reader = new BufferedReader(new FileReader(gazetteerInput))) {
while ((line = reader.readLine()) != null) {
String[] values = line.split("\t");
if (counter == 0) {
} else {
Document doc = new Document();
for (int i = 0; i < fields.size() - 1; i++) {
doc.add(new TextField(fields.get(i), values[i].trim(), Field.Store.YES));
}
String placeName = values[0];
String lat = values[2];
String lon = values[1];
String dsg = "region";
String id = "rg" + counter;
String hierarchy = placeName;
doc.add(new TextField("hierarchy", hierarchy, Field.Store.YES));
doc.add(new TextField("placename", placeName, Field.Store.YES));
doc.add(new StringField("latitude", lat, Field.Store.YES));
doc.add(new StringField("longitude", lon, Field.Store.YES));
doc.add(new StringField("loctype", dsg, Field.Store.YES));
doc.add(new StringField("admincode", "", Field.Store.YES));
doc.add(new StringField("countrycode", id, Field.Store.YES));
doc.add(new StringField("countycode", "", Field.Store.YES));
doc.add(new StringField("locid", id, Field.Store.YES));
doc.add(new StringField("gazsource", "region", Field.Store.YES));
//countrycontext file format
// US KY 131 United States Kentucky Leslie
ccfileentries.add(id + "\t" + id + "\t" + id + "\t" + placeName + "\t" + "NO_DATA_FOUND" + "\t" + "NO_DATA_FOUND" + "\t" + "("
+ placeName + ")" + "\t" + "NO_DATA_FOUND" + "\t" + "NO_DATA_FOUND" + "\n");
if (w != null) {
w.addDocument(doc);
}
}
counter++;
}
if (w != null) {
w.commit();
}
}
try (BufferedWriter bw = new BufferedWriter(new FileWriter(outputCountryContextfile, true))) {
for (String string : ccfileentries) {
bw.write(string);
}
System.out.println("successfully wrote Region entries to country oontext file");
}
System.out.println("Completed indexing regions!");
}