public static void main()

in core/src/main/java/org/apache/stormcrawler/filtering/URLFilters.java [142:204]


    public static void main(String[] args) throws ParseException {

        Config conf = new Config();

        // loads the default configuration file
        Map<String, Object> defaultSCConfig =
                Utils.findAndReadConfigFile("crawler-default.yaml", false);
        conf.putAll(ConfUtils.extractConfigElement(defaultSCConfig));

        String configFile = "urlfilters.json";

        Options options =
                new Options()
                        .addOption("f", true, "Filters configuration file. Default " + configFile);

        CommandLineParser parser = new DefaultParser();
        CommandLine cmd = parser.parse(options, args);

        if (cmd.hasOption("f")) {
            configFile = cmd.getOptionValue("f");
        }

        if (cmd.getArgList().isEmpty()) {
            System.err.println("Missing argument for input URL");
            System.exit(-1);
        }

        // read URL to check
        String inputURL = cmd.getArgList().get(0);

        // if a URL has been specified in 2nd position
        String sourceURL = inputURL;
        if (cmd.getArgList().size() > 1) {
            sourceURL = cmd.getArgList().get(1);
        }

        try {
            URLFilters filters = new URLFilters(conf, configFile);
            String normalizedURL = inputURL;
            try {
                for (URLFilter filter : filters.filters) {
                    long start = System.currentTimeMillis();
                    normalizedURL =
                            filter.filter(new URL(sourceURL), new Metadata(), normalizedURL);
                    long end = System.currentTimeMillis();
                    System.out.println(
                            "\t["
                                    + filter.getClass().getName()
                                    + "] "
                                    + (end - start)
                                    + "msec => "
                                    + normalizedURL);
                    if (normalizedURL == null) break;
                }
            } catch (Exception e) {
                LOG.error("URL filtering threw exception", e);
            }
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(-1);
        }
        System.exit(0);
    }