public boolean filter()

in core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java [207:251]


    public boolean filter(String url, Metadata metadata) throws MalformedURLException {
        URL u = new URL(url);

        // first try the full hostname
        String hostname = u.getHost();
        if (checkScope(hostNameRules.get(hostname), u)) {
            return true;
        }

        // then on the various components of the domain
        final String[] domainParts = hostname.split("\\.");
        String domain = null;
        for (int i = domainParts.length - 1; i >= 0; i--) {
            domain = domainParts[i] + (domain == null ? "" : "." + domain);
            if (checkScope(domainRules.get(domain), u)) {
                return true;
            }
        }

        // check on parent's URL metadata
        for (MDScope scope : metadataRules) {
            final String[] vals = metadata.getValues(scope.getKey());
            if (vals == null) {
                continue;
            }
            for (String v : vals) {
                if (v.equalsIgnoreCase(scope.getValue())) {
                    FastURLFilter.LOG.debug(
                            "Filtering {} matching metadata {}:{}",
                            url,
                            scope.getKey(),
                            scope.getValue());
                    if (checkScope(scope, u)) {
                        return true;
                    }
                }
            }
        }

        if (checkScope(globalRules, u)) {
            return true;
        }

        return false;
    }