private List readConfiguration()

in core/src/main/java/org/apache/stormcrawler/filtering/regex/RegexURLNormalizer.java [164:219]


    private List<Rule> readConfiguration(Reader reader) {
        List<Rule> rules = new ArrayList<>();
        try {

            // borrowed heavily from code in Configuration.java
            Document doc =
                    DocumentBuilderFactory.newInstance()
                            .newDocumentBuilder()
                            .parse(new InputSource(reader));
            Element root = doc.getDocumentElement();
            if ((!"regex-normalize".equals(root.getTagName())) && (LOG.isErrorEnabled())) {
                LOG.error("bad conf file: top-level element not <regex-normalize>");
            }
            NodeList regexes = root.getChildNodes();
            for (int i = 0; i < regexes.getLength(); i++) {
                Node regexNode = regexes.item(i);
                if (!(regexNode instanceof Element)) {
                    continue;
                }
                Element regex = (Element) regexNode;
                if ((!"regex".equals(regex.getTagName())) && (LOG.isWarnEnabled())) {
                    LOG.warn("bad conf file: element not <regex>");
                }
                NodeList fields = regex.getChildNodes();
                String patternValue = null;
                String subValue = null;
                for (int j = 0; j < fields.getLength(); j++) {
                    Node fieldNode = fields.item(j);
                    if (!(fieldNode instanceof Element)) {
                        continue;
                    }
                    Element field = (Element) fieldNode;
                    if ("pattern".equals(field.getTagName()) && field.hasChildNodes()) {
                        patternValue = ((Text) field.getFirstChild()).getData();
                    }
                    if ("substitution".equals(field.getTagName()) && field.hasChildNodes()) {
                        subValue = ((Text) field.getFirstChild()).getData();
                    }
                    if (!field.hasChildNodes()) {
                        subValue = "";
                    }
                }
                if (patternValue != null && subValue != null) {
                    Rule rule = createRule(patternValue, subValue);
                    rules.add(rule);
                }
            }
        } catch (Exception e) {
            LOG.error("error parsing conf file", e);
            return EMPTY_RULES;
        }
        if (rules.size() == 0) {
            return EMPTY_RULES;
        }
        return rules;
    }