public static void traverse()

in core/src/main/java/org/apache/stormcrawler/parse/TextExtractor.java [160:208]


    public static void traverse(
            NodeVisitor visitor, Node root, int maxSize, StringBuilder builder) {
        Validate.notNull(visitor, "null visitor in traverse method");
        Validate.notNull(root, "null root node in traverse method");
        Node node = root;
        int depth = 0;

        while (node != null) {
            // interrupts if too much text has already been produced
            if (maxSize > 0 && builder.length() >= maxSize) return;

            Node parent =
                    node.parentNode(); // remember parent to find nodes that get replaced in .head
            int origSize = parent != null ? parent.childNodeSize() : 0;
            Node next = node.nextSibling();

            visitor.head(node, depth); // visit current node
            if (parent != null && !node.hasParent()) { // removed or replaced
                if (origSize == parent.childNodeSize()) { // replaced
                    node =
                            parent.childNode(
                                    node.siblingIndex()); // replace ditches parent but keeps
                    // sibling index
                } else { // removed
                    node = next;
                    if (node == null) { // last one, go up
                        node = parent;
                        depth--;
                    }
                    continue; // don't tail removed
                }
            }

            if (node.childNodeSize() > 0) { // descend
                node = node.childNode(0);
                depth++;
            } else {
                // when no more siblings, ascend
                while (node.nextSibling() == null && depth > 0) {
                    visitor.tail(node, depth);
                    node = node.parentNode();
                    depth--;
                }
                visitor.tail(node, depth);
                if (node == root) break;
                node = node.nextSibling();
            }
        }
    }