src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java [125:266]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    return null;
  }

  /**
   * Append a node to the current container.
   * 
   * @param newNode
   *          New node to append
   * @throws org.xml.sax.SAXException if text is found before 
   * the document element
   */
  protected void append(Node newNode) throws org.xml.sax.SAXException {

    Node currentNode = m_currentNode;

    if (null != currentNode) {
      currentNode.appendChild(newNode);

      // System.out.println(newNode.getNodeName());
    } else if (null != m_docFrag) {
      m_docFrag.appendChild(newNode);
    } else {
      boolean ok = true;
      short type = newNode.getNodeType();

      if (type == Node.TEXT_NODE) {
        String data = newNode.getNodeValue();

        if ((null != data) && (data.trim().length() > 0)) {
          throw new org.xml.sax.SAXException(
              "Warning: can't output text before document element!  Ignoring...");
        }

        ok = false;
      } else if (type == Node.ELEMENT_NODE) {
        if (m_doc.getDocumentElement() != null) {
          throw new org.xml.sax.SAXException(
              "Can't have more than one root on a DOM!");
        }
      }

      if (ok)
        m_doc.appendChild(newNode);
    }
  }

  /**
   * Receive an object for locating the origin of SAX document events.
   * 
   * <p>
   * SAX parsers are strongly encouraged (though not absolutely required) to
   * supply a locator: if it does so, it must supply the locator to the
   * application by invoking this method before invoking any of the other
   * methods in the ContentHandler interface.
   * </p>
   * 
   * <p>
   * The locator allows the application to determine the end position of any
   * document-related event, even if the parser is not reporting an error.
   * Typically, the application will use this information for reporting its own
   * errors (such as character content that does not match an application's
   * business rules). The information returned by the locator is probably not
   * sufficient for use with a search engine.
   * </p>
   * 
   * <p>
   * Note that the locator will return correct information only during the
   * invocation of the events in this interface. The application should not
   * attempt to use it at any other time.
   * </p>
   * 
   * @param locator
   *          An object that can return the location of any SAX document event.
   * @see org.xml.sax.Locator
   */
  public void setDocumentLocator(Locator locator) {

    // No action for the moment.
  }

  /**
   * Receive notification of the beginning of a document.
   * 
   * <p>
   * The SAX parser will invoke this method only once, before any other methods
   * in this interface or in DTDHandler (except for setDocumentLocator).
   * </p>
   */
  public void startDocument() throws org.xml.sax.SAXException {

    // No action for the moment.
  }

  /**
   * Receive notification of the end of a document.
   * 
   * <p>
   * The SAX parser will invoke this method only once, and it will be the last
   * method invoked during the parse. The parser shall not invoke this method
   * until it has either abandoned parsing (because of an unrecoverable error)
   * or reached the end of input.
   * </p>
   */
  public void endDocument() throws org.xml.sax.SAXException {

    // No action for the moment.
  }

  /**
   * Receive notification of the beginning of an element.
   * 
   * <p>
   * The Parser will invoke this method at the beginning of every element in the
   * XML document; there will be a corresponding endElement() event for every
   * startElement() event (even when the element is empty). All of the element's
   * content will be reported, in order, before the corresponding endElement()
   * event.
   * </p>
   * 
   * <p>
   * If the element name has a namespace prefix, the prefix will still be
   * attached. Note that the attribute list provided will contain only
   * attributes with explicit values (specified or defaulted): #IMPLIED
   * attributes will be omitted.
   * </p>
   * 
   * 
   * @param ns
   *          The namespace of the node
   * @param localName
   *          The local part of the qualified name
   * @param name
   *          The element name.
   * @param atts
   *          The attributes attached to the element, if any.
   * @see #endElement
   * @see org.xml.sax.Attributes
   */
  public void startElement(String ns, String localName, String name,
      Attributes atts) throws org.xml.sax.SAXException {

    Element elem;
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java [132:271]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    return null;
  }

  /**
   * Append a node to the current container.
   * 
   * @param newNode
   *          New node to append
   */
  protected void append(Node newNode) throws org.xml.sax.SAXException {

    Node currentNode = m_currentNode;

    if (null != currentNode) {
      currentNode.appendChild(newNode);

      // System.out.println(newNode.getNodeName());
    } else if (null != m_docFrag) {
      m_docFrag.appendChild(newNode);
    } else {
      boolean ok = true;
      short type = newNode.getNodeType();

      if (type == Node.TEXT_NODE) {
        String data = newNode.getNodeValue();

        if ((null != data) && (data.trim().length() > 0)) {
          throw new org.xml.sax.SAXException(
              "Warning: can't output text before document element!  Ignoring...");
        }

        ok = false;
      } else if (type == Node.ELEMENT_NODE) {
        if (m_doc.getDocumentElement() != null) {
          throw new org.xml.sax.SAXException(
              "Can't have more than one root on a DOM!");
        }
      }

      if (ok)
        m_doc.appendChild(newNode);
    }
  }

  /**
   * Receive an object for locating the origin of SAX document events.
   * 
   * <p>
   * SAX parsers are strongly encouraged (though not absolutely required) to
   * supply a locator: if it does so, it must supply the locator to the
   * application by invoking this method before invoking any of the other
   * methods in the ContentHandler interface.
   * </p>
   * 
   * <p>
   * The locator allows the application to determine the end position of any
   * document-related event, even if the parser is not reporting an error.
   * Typically, the application will use this information for reporting its own
   * errors (such as character content that does not match an application's
   * business rules). The information returned by the locator is probably not
   * sufficient for use with a search engine.
   * </p>
   * 
   * <p>
   * Note that the locator will return correct information only during the
   * invocation of the events in this interface. The application should not
   * attempt to use it at any other time.
   * </p>
   * 
   * @param locator
   *          An object that can return the location of any SAX document event.
   * @see org.xml.sax.Locator
   */
  public void setDocumentLocator(Locator locator) {

    // No action for the moment.
  }

  /**
   * Receive notification of the beginning of a document.
   * 
   * <p>
   * The SAX parser will invoke this method only once, before any other methods
   * in this interface or in DTDHandler (except for setDocumentLocator).
   * </p>
   */
  public void startDocument() throws org.xml.sax.SAXException {

    // No action for the moment.
  }

  /**
   * Receive notification of the end of a document.
   * 
   * <p>
   * The SAX parser will invoke this method only once, and it will be the last
   * method invoked during the parse. The parser shall not invoke this method
   * until it has either abandoned parsing (because of an unrecoverable error)
   * or reached the end of input.
   * </p>
   */
  public void endDocument() throws org.xml.sax.SAXException {

    // No action for the moment.
  }

  /**
   * Receive notification of the beginning of an element.
   * 
   * <p>
   * The Parser will invoke this method at the beginning of every element in the
   * XML document; there will be a corresponding endElement() event for every
   * startElement() event (even when the element is empty). All of the element's
   * content will be reported, in order, before the corresponding endElement()
   * event.
   * </p>
   * 
   * <p>
   * If the element name has a namespace prefix, the prefix will still be
   * attached. Note that the attribute list provided will contain only
   * attributes with explicit values (specified or defaulted): #IMPLIED
   * attributes will be omitted.
   * </p>
   * 
   * 
   * @param ns
   *          The namespace of the node
   * @param localName
   *          The local part of the qualified name
   * @param name
   *          The element name.
   * @param atts
   *          The attributes attached to the element, if any.
   * @see #endElement
   * @see org.xml.sax.Attributes
   */
  public void startElement(String ns, String localName, String name,
      Attributes atts) throws org.xml.sax.SAXException {

    Element elem;
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



