in src/java/org/apache/nutch/segment/SegmentReader.java [659:762]
public int run(String[] args) throws Exception {
if (args.length < 2) {
usage();
return -1;
}
int mode = -1;
if (args[0].equals("-dump"))
mode = MODE_DUMP;
else if (args[0].equals("-list"))
mode = MODE_LIST;
else if (args[0].equals("-get"))
mode = MODE_GET;
// collect general options
for (int i = 1; i < args.length; i++) {
if (args[i].equals("-nocontent")) {
co = false;
args[i] = null;
} else if (args[i].equals("-nofetch")) {
fe = false;
args[i] = null;
} else if (args[i].equals("-nogenerate")) {
ge = false;
args[i] = null;
} else if (args[i].equals("-noparse")) {
pa = false;
args[i] = null;
} else if (args[i].equals("-noparsedata")) {
pd = false;
args[i] = null;
} else if (args[i].equals("-noparsetext")) {
pt = false;
args[i] = null;
} else if (args[i].equals("-recode")) {
recodeContent = true;
args[i] = null;
}
}
if (recodeContent) {
LOG.info("Recoding charset of HTML content");
getConf().setBoolean("segment.reader.content.recode", true);
}
// collect required args
switch (mode) {
case MODE_DUMP:
String input = args[1];
if (input == null) {
System.err.println("Missing required argument: <segment_dir>");
usage();
return -1;
}
String output = args.length > 2 ? args[2] : null;
if (output == null) {
System.err.println("Missing required argument: <output>");
usage();
return -1;
}
dump(new Path(input), new Path(output));
return 0;
case MODE_LIST:
ArrayList<Path> dirs = new ArrayList<>();
for (int i = 1; i < args.length; i++) {
if (args[i] == null)
continue;
if (args[i].equals("-dir")) {
Path dir = new Path(args[++i]);
FileSystem fs = dir.getFileSystem(getConf());
FileStatus[] fstats = fs.listStatus(dir,
HadoopFSUtil.getPassDirectoriesFilter(fs));
Path[] files = HadoopFSUtil.getPaths(fstats);
if (files != null && files.length > 0) {
dirs.addAll(Arrays.asList(files));
}
} else
dirs.add(new Path(args[i]));
}
list(dirs, new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
return 0;
case MODE_GET:
input = args[1];
if (input == null) {
System.err.println("Missing required argument: <segment_dir>");
usage();
return -1;
}
String key = args.length > 2 ? args[2] : null;
if (key == null) {
System.err.println("Missing required argument: <keyValue>");
usage();
return -1;
}
get(new Path(input), new Text(key),
new OutputStreamWriter(System.out, StandardCharsets.UTF_8),
new HashMap<>());
return 0;
default:
System.err.println("Invalid operation: " + args[0]);
usage();
return -1;
}
}