self.node_descendant_text

in lib/crawler/content_engine/utils.rb [33:84]


      def self.node_descendant_text(node, ignore_tags = NON_CONTENT_TAGS) 
        return '' unless node&.present?

        unless node.respond_to?(:children) && node.respond_to?(:name) && node.respond_to?(:text?)
          raise ArgumentError, "Expecting something node-like but got a #{node.class}"
        end

        to_process_stack = [node]
        text = []

        loop do
          
          node = to_process_stack.pop
          break unless node

          
          if node.is_a?(String)
            text << node unless node == ' ' && text.last == ' '
            next
          end

          
          
          next if ignore_tags.include?(node.name)

          
          if replace_with_whitespace?(node)
            text << ' ' unless text.last == ' '
            next
          end

          
          if node.text?
            content = node.content
            text << content.squish if content
            next
          end

          
          to_process_stack << ' '

          
          node.children.reverse_each { |child| to_process_stack << child }

          
          to_process_stack << ' '
        end

        
        text.join.squish!
      end