public void index()

in geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GazetteerIndexer.java [115:171]


  public void index(File geonamesData, File geoNamesCountryInfo, File geonamesAdmin1CodesASCII, File usgsDataFile,
                    File usgsGovUnitsFile, File outputIndexDir, File outputCountryContextFile, File regionsFile)
          throws IOException {
    if (!outputIndexDir.isDirectory()) {
      throw new IllegalArgumentException("outputIndexDir must be a directory.");
    }
    if (!geonamesData.exists()) {
      throw new FileNotFoundException("geonames data file does not exist");
    }
    if (!geoNamesCountryInfo.exists()) {
      throw new FileNotFoundException("geoNamesCountryCodes data file does not exist");
    }
    if (!geonamesAdmin1CodesASCII.exists()) {
      throw new FileNotFoundException("geonamesAdmin1CodesASCII data file does not exist");
    }
    if (!usgsDataFile.exists()) {
      throw new FileNotFoundException("usgsDataFile data file does not exist");
    }
    if (!usgsGovUnitsFile.exists()) {
      throw new FileNotFoundException("usgsGovUnitsFile data file does not exist");
    }
    if (!outputIndexDir.exists()) {
      throw new FileNotFoundException("outputIndexDir data file does not exist");
    }
    if (!regionsFile.exists()) {
      throw new FileNotFoundException("regionsFile data file does not exist");
    }

    String indexloc = outputIndexDir.getPath() + "/opennlp_geoentitylinker_gazetteer";
    Directory index = new MMapDirectory(Paths.get(indexloc));
    Analyzer a = new StandardAnalyzer(new CharArraySet(new ArrayList<>(), true));
    Map<String, Analyzer> analyMap = new HashMap<>();

    analyMap.put("countrycode", new KeywordAnalyzer());
    analyMap.put("admincode", new KeywordAnalyzer());
    analyMap.put("loctype", new KeywordAnalyzer());
    analyMap.put("countycode", new KeywordAnalyzer());
    analyMap.put("gazsource", new KeywordAnalyzer());

    PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(a, analyMap);
    IndexWriterConfig config = new IndexWriterConfig(aWrapper);
    try (IndexWriter w = new IndexWriter(index, config)) {
      //write the column headers for the countryContextFile
      try (FileWriter writer = new FileWriter(outputCountryContextFile, false)) {
        String colNamesForCountryContextFile = "countrycode\tprovcode\tcountycode\tcountryname\tprovincename\tcountyname\tcountryregex\tprovregex\tcountyregex\n";
        writer.write(colNamesForCountryContextFile);
        writer.flush();
      }
      USGSProcessor.process(usgsGovUnitsFile, usgsDataFile, outputCountryContextFile, w);
      GeonamesProcessor.process(geoNamesCountryInfo, geonamesAdmin1CodesASCII, geonamesData, outputCountryContextFile, w);
      RegionProcessor.process(regionsFile, outputCountryContextFile, w);
      w.commit();
    }

    System.out.println("\nIndexing complete. Be sure to add '" + indexloc + "' and context file '" +
            outputCountryContextFile.getPath() + "' to entitylinker.properties file");
  }