public Map run()

in src/java/org/apache/nutch/crawl/CrawlDb.java [288:377]


  public Map<String, Object> run(Map<String, Object> args, String crawlId) throws Exception {

    Map<String, Object> results = new HashMap<>();

    boolean normalize = getConf().getBoolean(CrawlDbFilter.URL_NORMALIZING,
        false);
    boolean filter = getConf().getBoolean(CrawlDbFilter.URL_FILTERING, false);
    boolean additionsAllowed = getConf().getBoolean(CRAWLDB_ADDITIONS_ALLOWED,
        true);
    boolean force = false;
    HashSet<Path> dirs = new HashSet<>();

    if (args.containsKey("normalize")) {
      normalize = true;
    } 
    if (args.containsKey("filter")) {
      filter = true;
    } 
    if (args.containsKey("force")) {
      force = true;
    } 
    if (args.containsKey("noAdditions")) {
      additionsAllowed = false;
    }

    Path crawlDb;
    if(args.containsKey(Nutch.ARG_CRAWLDB)) {
      Object crawldbPath = args.get(Nutch.ARG_CRAWLDB);
      if(crawldbPath instanceof Path) {
        crawlDb = (Path) crawldbPath;
      }
      else {
        crawlDb = new Path(crawldbPath.toString());
      }
    }
    else {
      crawlDb = new Path(crawlId+"/crawldb");
    }

    Path segmentsDir;
    if(args.containsKey(Nutch.ARG_SEGMENTDIR)) {
      Object segDir = args.get(Nutch.ARG_SEGMENTDIR);
      if(segDir instanceof Path) {
        segmentsDir = (Path) segDir;
      }
      else {
        segmentsDir = new Path(segDir.toString());
      }
      FileSystem fs = segmentsDir.getFileSystem(getConf());
      FileStatus[] paths = fs.listStatus(segmentsDir,
          HadoopFSUtil.getPassDirectoriesFilter(fs));
      dirs.addAll(Arrays.asList(HadoopFSUtil.getPaths(paths)));
    }
    else if(args.containsKey(Nutch.ARG_SEGMENTS)) {
      Object segments = args.get(Nutch.ARG_SEGMENTS);
      ArrayList<String> segmentList = new ArrayList<>();
      if(segments instanceof ArrayList) {
    	segmentList = (ArrayList<String>)segments;
      }
      else if(segments instanceof Path){
    	segmentList.add(segments.toString());
      }
    	      
      for(String segment: segmentList) {
        dirs.add(new Path(segment));
      }
    }
    else {
      String segmentDir = crawlId+"/segments";
      File dir = new File(segmentDir);
      File[] segmentsList = dir.listFiles();  
      Arrays.sort(segmentsList, (f1, f2) -> {
        if(f1.lastModified()>f2.lastModified())
          return -1;
        else
          return 0;
      });
      dirs.add(new Path(segmentsList[0].getPath()));
    }
    try {
      update(crawlDb, dirs.toArray(new Path[dirs.size()]), normalize,
          filter, additionsAllowed, force);
      results.put(Nutch.VAL_RESULT, Integer.toString(0));
      return results;
    } catch (Exception e) {
      LOG.error("CrawlDb update: " + StringUtils.stringifyException(e));
      results.put(Nutch.VAL_RESULT, Integer.toString(-1));
      return results;
    }
  }