public void extractMetaTags()

in core/src/main/java/org/apache/stormcrawler/util/RobotsTags.java [84:116]


    public void extractMetaTags(DocumentFragment doc) throws XPathExpressionException {
        NodeList nodes = (NodeList) expression.evaluate(doc, XPathConstants.NODESET);
        if (nodes == null) return;
        int numNodes = nodes.getLength();
        for (int i = 0; i < numNodes; i++) {
            Node n = (Node) nodes.item(i);
            // iterate on the attributes
            // and check that it has name=robots and content
            // whatever the case is
            boolean isRobots = false;
            String content = null;
            NamedNodeMap attrs = n.getAttributes();
            for (int att = 0; att < attrs.getLength(); att++) {
                Node keyval = attrs.item(att);
                if ("name".equalsIgnoreCase(keyval.getNodeName())
                        && "robots".equalsIgnoreCase(keyval.getNodeValue())) {
                    isRobots = true;
                    continue;
                }
                if ("content".equalsIgnoreCase(keyval.getNodeName())) {
                    content = keyval.getNodeValue();
                    continue;
                }
            }

            if (isRobots && content != null) {
                // got a value - split it
                String[] vals = content.split(" *, *");
                parseValues(vals);
                return;
            }
        }
    }