public static void readFile()

in geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java [54:113]


  public static void readFile(File gazateerInputData, File outputCountryContextfile, IndexWriter w) throws Exception {
    List<String> ccfileentries = new ArrayList<>();
    BufferedReader reader = new BufferedReader(new FileReader(gazateerInputData));
    List<String> fields = new ArrayList<>();
    int counter = 0;
    System.out.println("reading gazetteer data from Regions file...........");
    String line;
    while ((line = reader.readLine()) != null) {

      String[] values = line.split("\t");
      if (counter == 0) {

      } else {
        Document doc = new Document();
        for (int i = 0; i < fields.size() - 1; i++) {
          doc.add(new TextField(fields.get(i), values[i].trim(), Field.Store.YES));
        }
        String placeName = values[0];
        String lat = values[2];
        String lon = values[1];
        String dsg = "region";
        String id = "rg" + counter;

        String hierarchy = placeName;

        doc.add(new TextField("hierarchy", hierarchy, Field.Store.YES));
        doc.add(new TextField("placename", placeName, Field.Store.YES));
        doc.add(new StringField("latitude", lat, Field.Store.YES));
        doc.add(new StringField("longitude", lon, Field.Store.YES));
        doc.add(new StringField("loctype", dsg, Field.Store.YES));
        doc.add(new StringField("admincode", "", Field.Store.YES));
        doc.add(new StringField("countrycode", id, Field.Store.YES));
        doc.add(new StringField("countycode", "", Field.Store.YES));

        doc.add(new StringField("locid", id, Field.Store.YES));
        doc.add(new StringField("gazsource", "region", Field.Store.YES));
        //countrycontext file format
        // US	KY	131	United States	Kentucky	Leslie

        ccfileentries.add(id + "\t" + id + "\t" + id + "\t" + placeName + "\t" + "NO_DATA_FOUND" + "\t" + "NO_DATA_FOUND" + "\t" + "("
            + placeName + ")" + "\t" + "NO_DATA_FOUND" + "\t" + "NO_DATA_FOUND" + "\n");
        if (w != null) {
          w.addDocument(doc);
        }
      }
      counter++;

    }
    if (w != null) {
      w.commit();
    }
    FileWriter writer = new FileWriter(outputCountryContextfile, true);
    BufferedWriter bw = new BufferedWriter(writer);
    for (String string : ccfileentries) {
      bw.write(string);
    }
    System.out.println("successfully wrote Region entries to country oontext file");
    bw.close();
    System.out.println("Completed indexing regions!");
  }