in core/src/main/java/org/apache/stormcrawler/protocol/DelegatorProtocol.java [167:218]
boolean isMatch(final String url, final Metadata metadata) {
// if this FP has no filters nor regexps - it can handle anything
if (filters.isEmpty() && urlPatterns.isEmpty()) return true;
boolean atLeastOneMatch = false;
// check that all its filters are satisfied
for (Filter f : filters) {
boolean match = true;
if (f.value == null || f.value.equals("")) {
// just interested in the fact that the key exists
if (!metadata.containsKey(f.key)) {
LOG.trace("Key {} not found in metadata {}", f.key, metadata);
match = false;
}
} else {
// interested in the value associated with the key
if (!metadata.containsKeyWithValue(f.key, f.value)) {
LOG.trace(
"Key {} not found with value {} in metadata {}",
f.key,
f.value,
metadata);
match = false;
}
}
if (match) atLeastOneMatch = true;
// optimisation
if (operator.equals(Operator.AND) && !match) return false;
else if (operator.equals(Operator.OR) && match) return true;
}
// same approach with the URLs
for (Pattern p : urlPatterns) {
boolean found = p.asPredicate().test(url);
if (found) {
atLeastOneMatch = true;
}
// optimisation
if (operator.equals(Operator.AND) && !found) return false;
else if (operator.equals(Operator.OR) && found) return true;
}
// if we get to this point and the operator is AND, it means everything has
// matched
// but if the operator is OR we need to check that something has matched at all
if (operator.equals(Operator.OR) && !atLeastOneMatch) return false;
return true;
}