public static void main()

in webindex/modules/data/src/main/java/webindex/data/Init.java [40:72]


  public static void main(String[] args) throws Exception {

    if (args.length > 1) {
      log.error("Usage: Init [<dataDir>]");
      System.exit(1);
    }
    WebIndexConfig webIndexConfig = WebIndexConfig.load();

    IndexEnv env = new IndexEnv(webIndexConfig);
    env.setFluoTableSplits();
    log.info("Initialized Fluo table splits");

    if (args.length == 1) {
      final String dataDir = args[0];
      IndexEnv.validateDataDir(dataDir);

      SparkConf sparkConf = new SparkConf().setAppName("webindex-init");
      try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {
        IndexStats stats = new IndexStats(ctx);

        final JavaPairRDD<Text, ArchiveReader> archives = ctx.newAPIHadoopFile(dataDir,
            WARCFileInputFormat.class, Text.class, ArchiveReader.class, new Configuration());

        JavaRDD<Page> pages = IndexUtil.createPages(archives);

        env.initializeIndexes(ctx, pages, stats);

        stats.print();
      }
    } else {
      log.info("An init data dir was not specified");
    }
  }