src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java [33:72]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
public class HTMLMetaProcessor {

  /**
   * Utility class with indicators for the robots directives "noindex" and
   * "nofollow", and HTTP-EQUIV/no-cache
   */

  /**
   * Sets the indicators in <code>robotsMeta</code> to appropriate values, based
   * on any META tags found under the given <code>node</code>.
   * @param metaTags a {@link HTMLMetaTags} to populate with tags discovered in the 
   * given Node
   * @param node a DOM {@link Node} to process and extract metadata from
   * @param currURL the cononical URL associated with the metatags and Node
   */
  public static final void getMetaTags(HTMLMetaTags metaTags, Node node,
      URL currURL) {

    metaTags.reset();
    getMetaTagsHelper(metaTags, node, currURL);
  }

  private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node,
      URL currURL) {

    if (node.getNodeType() == Node.ELEMENT_NODE) {

      if ("body".equalsIgnoreCase(node.getNodeName())) {
        // META tags should not be under body
        return;
      }

      if ("meta".equalsIgnoreCase(node.getNodeName())) {
        NamedNodeMap attrs = node.getAttributes();
        Node nameNode = null;
        Node equivNode = null;
        Node contentNode = null;
        // Retrieves name, http-equiv and content attribues
        for (int i = 0; i < attrs.getLength(); i++) {
          Node attr = attrs.item(i);
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java [35:74]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
public class HTMLMetaProcessor {

  /**
   * Utility class with indicators for the robots directives "noindex" and
   * "nofollow", and HTTP-EQUIV/no-cache
   */

  /**
   * Sets the indicators in <code>robotsMeta</code> to appropriate values, based
   * on any META tags found under the given <code>node</code>.
   * @param metaTags a {@link HTMLMetaTags} to populate with tags discovered in the 
   * given Node
   * @param node a DOM {@link Node} to process and extract metadata from
   * @param currURL the cononical URL associated with the metatags and Node
   */
  public static final void getMetaTags(HTMLMetaTags metaTags, Node node,
      URL currURL) {

    metaTags.reset();
    getMetaTagsHelper(metaTags, node, currURL);
  }

  private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node,
      URL currURL) {

    if (node.getNodeType() == Node.ELEMENT_NODE) {

      if ("body".equalsIgnoreCase(node.getNodeName())) {
        // META tags should not be under body
        return;
      }

      if ("meta".equalsIgnoreCase(node.getNodeName())) {
        NamedNodeMap attrs = node.getAttributes();
        Node nameNode = null;
        Node equivNode = null;
        Node contentNode = null;
        // Retrieves name, http-equiv and content attribues
        for (int i = 0; i < attrs.getLength(); i++) {
          Node attr = attrs.item(i);
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



