in src/java/org/apache/nutch/indexer/IndexingJob.java [216:298]
public int run(String[] args) throws Exception {
if (args.length == 0) {
usage();
return -1;
}
Path crawlDb = null;
boolean noCrawlDb = false;
Path linkDb = null;
final List<Path> segments = new ArrayList<>();
String params = null;
boolean noCommit = false;
boolean deleteGone = false;
boolean filter = false;
boolean normalize = false;
boolean addBinaryContent = false;
boolean base64 = false;
for (int i = 0; i < args.length; i++) {
FileSystem fs = null;
Path dir = null;
if (args[i].equals("-nocrawldb")) {
noCrawlDb = true;
} else if (args[i].equals("-linkdb")) {
linkDb = new Path(args[++i]);
} else if (args[i].equals("-dir")) {
dir = new Path(args[++i]);
fs = dir.getFileSystem(getConf());
FileStatus[] fstats = fs.listStatus(dir,
HadoopFSUtil.getPassDirectoriesFilter(fs));
Path[] files = HadoopFSUtil.getPaths(fstats);
for (Path p : files) {
if (SegmentChecker.isIndexable(p,fs)) {
segments.add(p);
}
}
} else if (args[i].equals("-noCommit")) {
noCommit = true;
} else if (args[i].equals("-deleteGone")) {
deleteGone = true;
} else if (args[i].equals("-filter")) {
filter = true;
} else if (args[i].equals("-normalize")) {
normalize = true;
} else if (args[i].equals("-addBinaryContent")) {
addBinaryContent = true;
} else if (args[i].equals("-base64")) {
base64 = true;
} else if (args[i].equals("-params")) {
params = args[++i];
} else if (crawlDb == null && !noCrawlDb) {
/*
* expect CrawlDb as first non-option argument unless -nocrawldb is
* given
*/
crawlDb = new Path(args[i]);
} else {
// remaining arguments are segments
dir = new Path(args[i]);
fs = dir.getFileSystem(getConf());
if (SegmentChecker.isIndexable(dir,fs)) {
segments.add(dir);
}
}
}
if (segments.size() == 0) {
usage();
System.err.println("No indexable segments passed as arguments. At least one segment is required!");
return -1;
}
try {
index(crawlDb, linkDb, segments, noCommit, deleteGone, params, filter, normalize, addBinaryContent, base64);
return 0;
} catch (final Exception e) {
LOG.error("Indexer: {}", StringUtils.stringifyException(e));
return -1;
}
}