in core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java [108:165]
public void loadJSONResources(InputStream inputStream)
throws JsonParseException, JsonMappingException, IOException {
JsonNode rootNode = objectMapper.readTree(inputStream);
// if it contains a single object
// jump directly to its content
// https://github.com/apache/incubator-stormcrawler/issues/1013
if (rootNode.size() == 1 && rootNode.isObject()) {
rootNode = rootNode.fields().next().getValue();
}
final Rules rules = new Rules();
Iterator<JsonNode> iter = rootNode.elements();
while (iter.hasNext()) {
final JsonNode current = iter.next();
final Scope scope = new Scope();
final String scopeval = current.get("scope").asText().trim();
int offset = 0;
Scope.Type type;
String value = null;
// separate the type from the pattern
if (scopeval.equals("GLOBAL")) {
type = Scope.Type.GLOBAL;
} else if (scopeval.startsWith("domain:")) {
type = Scope.Type.DOMAIN;
offset = "domain:".length();
value = scopeval.substring(offset);
} else if (scopeval.startsWith("host:")) {
type = Scope.Type.HOSTNAME;
offset = "host:".length();
value = scopeval.substring(offset);
} else if (scopeval.startsWith("metadata:")) {
type = Scope.Type.METADATA;
offset = "metadata:".length();
value = scopeval.substring(offset);
} else throw new RuntimeException("Invalid scope: " + scopeval);
final JsonNode patternsNode = current.get("patterns");
if (patternsNode == null)
throw new RuntimeException("Missing patterns for scope" + scopeval);
final List<Rule> rlist = new LinkedList<>();
Iterator<JsonNode> iterPatterns = patternsNode.elements();
while (iterPatterns.hasNext()) {
JsonNode patternNode = iterPatterns.next();
rlist.add(new Rule(patternNode.asText()));
}
scope.setRules(rlist);
rules.addScope(scope, type, value);
}
this.rules = rules;
}