public static void readFile()

in geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java [58:118]


  public static void readFile(File gazetteerInput, File outputCountryContextfile, IndexWriter w)
          throws IOException {
    List<String> ccfileentries = new ArrayList<>();
    List<String> fields = new ArrayList<>();
    int counter = 0;
    System.out.println("reading gazetteer data from Regions file...........");
    String line;
    try (BufferedReader reader = new BufferedReader(new FileReader(gazetteerInput))) {
      while ((line = reader.readLine()) != null) {
        String[] values = line.split("\t");
        if (counter == 0) {

        } else {
          Document doc = new Document();
          for (int i = 0; i < fields.size() - 1; i++) {
            doc.add(new TextField(fields.get(i), values[i].trim(), Field.Store.YES));
          }
          String placeName = values[0];
          String lat = values[2];
          String lon = values[1];
          String dsg = "region";
          String id = "rg" + counter;

          String hierarchy = placeName;

          doc.add(new TextField("hierarchy", hierarchy, Field.Store.YES));
          doc.add(new TextField("placename", placeName, Field.Store.YES));
          doc.add(new StringField("latitude", lat, Field.Store.YES));
          doc.add(new StringField("longitude", lon, Field.Store.YES));
          doc.add(new StringField("loctype", dsg, Field.Store.YES));
          doc.add(new StringField("admincode", "", Field.Store.YES));
          doc.add(new StringField("countrycode", id, Field.Store.YES));
          doc.add(new StringField("countycode", "", Field.Store.YES));

          doc.add(new StringField("locid", id, Field.Store.YES));
          doc.add(new StringField("gazsource", "region", Field.Store.YES));
          //countrycontext file format
          // US	KY	131	United States	Kentucky	Leslie

          ccfileentries.add(id + "\t" + id + "\t" + id + "\t" + placeName + "\t" + "NO_DATA_FOUND" + "\t" + "NO_DATA_FOUND" + "\t" + "("
                  + placeName + ")" + "\t" + "NO_DATA_FOUND" + "\t" + "NO_DATA_FOUND" + "\n");
          if (w != null) {
            w.addDocument(doc);
          }
        }
        counter++;

      }
      if (w != null) {
        w.commit();
      }
    }

    try (BufferedWriter bw = new BufferedWriter(new FileWriter(outputCountryContextfile, true))) {
      for (String string : ccfileentries) {
        bw.write(string);
      }
      System.out.println("successfully wrote Region entries to country oontext file");
    }
    System.out.println("Completed indexing regions!");
  }