private void handleCompare()

in tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/TikaEvalCLI.java [262:362]


    private void handleCompare(String[] subsetArgs) throws Exception {
        List<String> argList = new ArrayList(Arrays.asList(subsetArgs));

        boolean containsBC = false;
        String inputDir = null;
        String extractsA = null;
        String alterExtract = null;
        //confirm there's a batch-config file
        for (int i = 0; i < argList.size(); i++) {
            String arg = argList.get(i);
            switch (arg) {
                case "-bc":
                    containsBC = true;
                    break;
                case "-inputDir":
                    if (i + 1 >= argList.size()) {
                        System.err.println("Must specify directory after -inputDir");
                        ExtractComparer.USAGE();
                        return;
                    }
                    inputDir = argList.get(i + 1);
                    i++;
                    break;
                case "-extractsA":
                    if (i + 1 >= argList.size()) {
                        System.err.println("Must specify directory after -extractsA");
                        ExtractComparer.USAGE();
                        return;
                    }
                    extractsA = argList.get(i + 1);
                    i++;
                    break;
                case "-alterExtract":
                    if (i + 1 >= argList.size()) {
                        System.err.println("Must specify type 'as_is', 'first_only' or " + "'concatenate_content' after -alterExtract");
                        ExtractComparer.USAGE();
                        return;
                    }
                    alterExtract = argList.get(i + 1);
                    i++;
                    break;
            }
        }
        if (alterExtract != null && !alterExtract.equals("as_is") && !alterExtract.equals("concatenate_content") && !alterExtract.equals("first_only")) {
            System.out.println("Sorry, I don't understand:" + alterExtract + ". The values must be one of: as_is, first_only, concatenate_content");
            ExtractComparer.USAGE();
            return;
        }

        //need to specify each in the commandline that goes into tika-batch
        //if only extracts is passed to tika-batch,
        //the crawler will see no inputDir and start crawling "input".
        //if the user doesn't specify inputDir, crawl extractsA
        if (inputDir == null && extractsA != null) {
            argList.add("-inputDir");
            argList.add(extractsA);
        }

        Path tmpBCConfig = null;
        try {
            tmpBCConfig = Files.createTempFile("tika-eval", ".xml");
            if (!containsBC) {
                try (InputStream is = this
                        .getClass()
                        .getResourceAsStream("/tika-eval-comparison-config.xml")) {
                    Files.copy(is, tmpBCConfig, StandardCopyOption.REPLACE_EXISTING);
                }
                argList.add("-bc");
                argList.add(tmpBCConfig
                        .toAbsolutePath()
                        .toString());

            }
            String[] updatedArgs = argList.toArray(new String[0]);
            DefaultParser defaultCLIParser = new DefaultParser();
            try {
                CommandLine commandLine = defaultCLIParser.parse(ExtractComparer.OPTIONS, updatedArgs);
                if (commandLine.hasOption("db") && commandLine.hasOption("jdbc")) {
                    System.out.println("Please specify either the default -db or the full -jdbc, not both");
                    ExtractComparer.USAGE();
                    return;
                }
            } catch (ParseException e) {
                System.out.println(e.getMessage() + "\n");
                ExtractComparer.USAGE();
                return;
            }

            // lazy delete because main() calls System.exit()
            if (tmpBCConfig != null && Files.isRegularFile(tmpBCConfig)) {
                tmpBCConfig
                        .toFile()
                        .deleteOnExit();
            }
            FSBatchProcessCLI.main(updatedArgs);
        } finally {
            if (tmpBCConfig != null && Files.isRegularFile(tmpBCConfig)) {
                Files.delete(tmpBCConfig);
            }
        }
    }