in tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/TikaEvalCLI.java [262:362]
private void handleCompare(String[] subsetArgs) throws Exception {
List<String> argList = new ArrayList(Arrays.asList(subsetArgs));
boolean containsBC = false;
String inputDir = null;
String extractsA = null;
String alterExtract = null;
//confirm there's a batch-config file
for (int i = 0; i < argList.size(); i++) {
String arg = argList.get(i);
switch (arg) {
case "-bc":
containsBC = true;
break;
case "-inputDir":
if (i + 1 >= argList.size()) {
System.err.println("Must specify directory after -inputDir");
ExtractComparer.USAGE();
return;
}
inputDir = argList.get(i + 1);
i++;
break;
case "-extractsA":
if (i + 1 >= argList.size()) {
System.err.println("Must specify directory after -extractsA");
ExtractComparer.USAGE();
return;
}
extractsA = argList.get(i + 1);
i++;
break;
case "-alterExtract":
if (i + 1 >= argList.size()) {
System.err.println("Must specify type 'as_is', 'first_only' or " + "'concatenate_content' after -alterExtract");
ExtractComparer.USAGE();
return;
}
alterExtract = argList.get(i + 1);
i++;
break;
}
}
if (alterExtract != null && !alterExtract.equals("as_is") && !alterExtract.equals("concatenate_content") && !alterExtract.equals("first_only")) {
System.out.println("Sorry, I don't understand:" + alterExtract + ". The values must be one of: as_is, first_only, concatenate_content");
ExtractComparer.USAGE();
return;
}
//need to specify each in the commandline that goes into tika-batch
//if only extracts is passed to tika-batch,
//the crawler will see no inputDir and start crawling "input".
//if the user doesn't specify inputDir, crawl extractsA
if (inputDir == null && extractsA != null) {
argList.add("-inputDir");
argList.add(extractsA);
}
Path tmpBCConfig = null;
try {
tmpBCConfig = Files.createTempFile("tika-eval", ".xml");
if (!containsBC) {
try (InputStream is = this
.getClass()
.getResourceAsStream("/tika-eval-comparison-config.xml")) {
Files.copy(is, tmpBCConfig, StandardCopyOption.REPLACE_EXISTING);
}
argList.add("-bc");
argList.add(tmpBCConfig
.toAbsolutePath()
.toString());
}
String[] updatedArgs = argList.toArray(new String[0]);
DefaultParser defaultCLIParser = new DefaultParser();
try {
CommandLine commandLine = defaultCLIParser.parse(ExtractComparer.OPTIONS, updatedArgs);
if (commandLine.hasOption("db") && commandLine.hasOption("jdbc")) {
System.out.println("Please specify either the default -db or the full -jdbc, not both");
ExtractComparer.USAGE();
return;
}
} catch (ParseException e) {
System.out.println(e.getMessage() + "\n");
ExtractComparer.USAGE();
return;
}
// lazy delete because main() calls System.exit()
if (tmpBCConfig != null && Files.isRegularFile(tmpBCConfig)) {
tmpBCConfig
.toFile()
.deleteOnExit();
}
FSBatchProcessCLI.main(updatedArgs);
} finally {
if (tmpBCConfig != null && Files.isRegularFile(tmpBCConfig)) {
Files.delete(tmpBCConfig);
}
}
}