public int run()

in src/java/org/apache/nutch/indexer/IndexingJob.java [216:298]


  public int run(String[] args) throws Exception {
    if (args.length == 0) {
      usage();
      return -1;
    }

    Path crawlDb = null;
    boolean noCrawlDb = false;

    Path linkDb = null;

    final List<Path> segments = new ArrayList<>();
    String params = null;

    boolean noCommit = false;
    boolean deleteGone = false;
    boolean filter = false;
    boolean normalize = false;
    boolean addBinaryContent = false;
    boolean base64 = false;

    for (int i = 0; i < args.length; i++) {
      FileSystem fs = null;
      Path dir = null;
      if (args[i].equals("-nocrawldb")) {
        noCrawlDb = true;
      } else if (args[i].equals("-linkdb")) {
        linkDb = new Path(args[++i]);
      } else if (args[i].equals("-dir")) {
        dir = new Path(args[++i]);
        fs = dir.getFileSystem(getConf());
        FileStatus[] fstats = fs.listStatus(dir,
            HadoopFSUtil.getPassDirectoriesFilter(fs));
        Path[] files = HadoopFSUtil.getPaths(fstats);
        for (Path p : files) {
          if (SegmentChecker.isIndexable(p,fs)) {
            segments.add(p);
          }
        }
      } else if (args[i].equals("-noCommit")) {
        noCommit = true;
      } else if (args[i].equals("-deleteGone")) {
        deleteGone = true;
      } else if (args[i].equals("-filter")) {
        filter = true;
      } else if (args[i].equals("-normalize")) {
        normalize = true;
      } else if (args[i].equals("-addBinaryContent")) {
        addBinaryContent = true;
      } else if (args[i].equals("-base64")) {
        base64 = true;
      } else if (args[i].equals("-params")) {
        params = args[++i];
      } else if (crawlDb == null && !noCrawlDb) {
        /*
         * expect CrawlDb as first non-option argument unless -nocrawldb is
         * given
         */
        crawlDb = new Path(args[i]);
      } else {
        // remaining arguments are segments
        dir = new Path(args[i]);
        fs = dir.getFileSystem(getConf());
        if (SegmentChecker.isIndexable(dir,fs)) {
          segments.add(dir);
        }
      }
    }

    if (segments.size() == 0) {
      usage();
      System.err.println("No indexable segments passed as arguments. At least one segment is required!");
      return -1;
    }

    try {
      index(crawlDb, linkDb, segments, noCommit, deleteGone, params, filter, normalize, addBinaryContent, base64);
      return 0;
    } catch (final Exception e) {
      LOG.error("Indexer: {}", StringUtils.stringifyException(e));
      return -1;
    }
  }