self.add

in _plugins/search-indexer.rb [45:108]


  def self.add(page)
    return if @excluded_paths.match(page.url)
    return if page.data['omit_from_search']

    content = page.content
                  .gsub(@html_excluded_tags, ' ')             
                  .gsub(@html_block_tags, "\n")               
                  .gsub(/\s*<[?\/!]?[a-z]+.*?>\s*/im, ' ')    
                  .gsub(/\s*[\r\n]+\s*/, "\n")                
                  .gsub(/\s{2,}/, ' ')                        
                  .gsub(/\s+([.:;,)!\]?])/, '\1')             
                  .strip                                      

    return if content.empty?

    url = @site.config["baseurl"] + page.url
    type = nil

    if page.instance_of?(Jekyll::Document)
      
      case page.collection&.label
      when 'posts'
        type = 'News'
      when 'authors'
        type = 'Authors'
      when 'events'
        type = 'Events'
      when 'versions'
        type = 'Downloads'
      when 'testimonials'
        type = 'Testimonials'
      when 'tutorials'
        type = 'Tutorials'
        
      else
        puts 'Unknown type: ' + page.collection&.label
      end
    end

    
    keywords = []
    keywords += page.data["categories"] unless page.data["categories"].nil? || page.data["categories"]&.empty?
    keywords += page.data["keywords"] unless page.data["keywords"].nil? || page.data["keywords"]&.empty?

    title = page.data["title"]
    title = page.data["primary_title"] if title.nil? || title.empty?
    if title.nil? || title.empty?
      
      if /<div.+?class="[^"]*copy-banner[^"]*".*?>.*?<div.+?class="[^"]*container[^"]*".*?>.*?<h1.*?><a.*?>\s*([^<]+)\s*<\/a>.*?<\/h1>/m =~ page.content
        title = "#{$1}"
      end
    end

    data = {
      url: url,
      title: title,
      content: content,
      keywords: keywords,
      type: type
    }

    @data.push(data)
  end