in src/java/org/apache/nutch/hostdb/UpdateHostDb.java [164:252]
public int run(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: UpdateHostDb -hostdb <hostdb> " +
"[-tophosts <tophosts>] [-crawldb <crawldb>] [-checkAll] [-checkFailed]" +
" [-checkNew] [-checkKnown] [-force] [-filter] [-normalize] [-urlLimit <N>]");
return -1;
}
Path hostDb = null;
Path crawlDb = null;
Path topHosts = null;
boolean checkFailed = false;
boolean checkNew = false;
boolean checkKnown = false;
boolean force = false;
boolean filter = false;
boolean normalize = false;
long urlLimit = -1l;
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-hostdb")) {
hostDb = new Path(args[i + 1]);
LOG.info("UpdateHostDb: hostdb: " + hostDb);
i++;
}
if (args[i].equals("-crawldb")) {
crawlDb = new Path(args[i + 1]);
LOG.info("UpdateHostDb: crawldb: " + crawlDb);
i++;
}
if (args[i].equals("-tophosts")) {
topHosts = new Path(args[i + 1]);
LOG.info("UpdateHostDb: tophosts: " + topHosts);
i++;
}
if (args[i].equals("-checkFailed")) {
LOG.info("UpdateHostDb: checking failed hosts");
checkFailed = true;
}
if (args[i].equals("-checkNew")) {
LOG.info("UpdateHostDb: checking new hosts");
checkNew = true;
}
if (args[i].equals("-checkKnown")) {
LOG.info("UpdateHostDb: checking known hosts");
checkKnown = true;
}
if (args[i].equals("-checkAll")) {
LOG.info("UpdateHostDb: checking all hosts");
checkFailed = true;
checkNew = true;
checkKnown = true;
}
if (args[i].equals("-force")) {
LOG.info("UpdateHostDb: forced check");
force = true;
}
if (args[i].equals("-filter")) {
LOG.info("UpdateHostDb: filtering enabled");
filter = true;
}
if (args[i].equals("-normalize")) {
LOG.info("UpdateHostDb: normalizing enabled");
normalize = true;
}
if (args[i].equals("-urlLimit")) {
urlLimit = Long.valueOf(args[i + 1]);
LOG.info("UpdateHostDb: URL limit set to " + urlLimit);
i++;
}
}
if (hostDb == null) {
System.err.println("hostDb is mandatory");
return -1;
}
try {
updateHostDb(hostDb, crawlDb, topHosts, checkFailed, checkNew,
checkKnown, force, filter, normalize, urlLimit);
return 0;
} catch (Exception e) {
LOG.error("UpdateHostDb: " + StringUtils.stringifyException(e));
return -1;
}
}