src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java [33:72]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - public class HTMLMetaProcessor { /** * Utility class with indicators for the robots directives "noindex" and * "nofollow", and HTTP-EQUIV/no-cache */ /** * Sets the indicators in robotsMeta to appropriate values, based * on any META tags found under the given node. * @param metaTags a {@link HTMLMetaTags} to populate with tags discovered in the * given Node * @param node a DOM {@link Node} to process and extract metadata from * @param currURL the cononical URL associated with the metatags and Node */ public static final void getMetaTags(HTMLMetaTags metaTags, Node node, URL currURL) { metaTags.reset(); getMetaTagsHelper(metaTags, node, currURL); } private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node, URL currURL) { if (node.getNodeType() == Node.ELEMENT_NODE) { if ("body".equalsIgnoreCase(node.getNodeName())) { // META tags should not be under body return; } if ("meta".equalsIgnoreCase(node.getNodeName())) { NamedNodeMap attrs = node.getAttributes(); Node nameNode = null; Node equivNode = null; Node contentNode = null; // Retrieves name, http-equiv and content attribues for (int i = 0; i < attrs.getLength(); i++) { Node attr = attrs.item(i); - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java [35:74]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - public class HTMLMetaProcessor { /** * Utility class with indicators for the robots directives "noindex" and * "nofollow", and HTTP-EQUIV/no-cache */ /** * Sets the indicators in robotsMeta to appropriate values, based * on any META tags found under the given node. * @param metaTags a {@link HTMLMetaTags} to populate with tags discovered in the * given Node * @param node a DOM {@link Node} to process and extract metadata from * @param currURL the cononical URL associated with the metatags and Node */ public static final void getMetaTags(HTMLMetaTags metaTags, Node node, URL currURL) { metaTags.reset(); getMetaTagsHelper(metaTags, node, currURL); } private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node, URL currURL) { if (node.getNodeType() == Node.ELEMENT_NODE) { if ("body".equalsIgnoreCase(node.getNodeName())) { // META tags should not be under body return; } if ("meta".equalsIgnoreCase(node.getNodeName())) { NamedNodeMap attrs = node.getAttributes(); Node nameNode = null; Node equivNode = null; Node contentNode = null; // Retrieves name, http-equiv and content attribues for (int i = 0; i < attrs.getLength(); i++) { Node attr = attrs.item(i); - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -