module Jekyll
  module RegexFilter
    def replace_regex(input, reg_str, repl_str)
      re = Regexp.new reg_str, Regexp::MULTILINE

      # This will be returned
      input.gsub re, repl_str
    end
  end
end

Liquid::Template.register_filter(Jekyll::RegexFilter)


#######
# This function rewrites a link in the following manner
#
# 1) If the link is fully external leave it as an unaltered link
# 2) If the link is an anchor, convert to the anchor scheme used in PDF generation
# 3) If the link target is in the PDF, change the link to point at the anchor in the PDF
# 4) If the link is pointing at somewhere on the brooklyn site which is not included in this PDF, point to the website with a specific version, so https://brooklyn.apache.org/v/0.9.0-SNAPSHOT/start/concept-quickstart.html for instance
#
# * Input - the document body, site - the jekyll site object, page - all pages, availablePages - ones included in this merge, mergePage - the root merge page, currentPage - the current page being merged
module RefactorURL
  def refactorURL(input, reg_str, site, pages, availablePages, mergePage, currentPage)
    if input == nil
      return nil
    end

    # generate document id, this will be used for the anchors
    $pid = "id-undefined"
    if currentPage['title'] != nil
      $pid = currentPage['title'].downcase.delete('/')
      $pid.gsub!(/\s+/, '-')
    end
    
    # re-write any ids to our internal references
    input.gsub!("id=\"", "id=\"internalLink_"+$pid+"_")
    
    # get rid of any opening in new tabs, they'll break our anchors
    input.gsub!(" target=\"_blank\"", "")
    
    # make a multi-line regex for finding URLs within the document body
    re = Regexp.new reg_str, Regexp::MULTILINE
    
    # for each url matched replace using the following rules
    input.gsub(re) {
      
      $newLink = "#"
      # there should only be one capturing group (the URL), so use the first
      $match = Regexp.last_match.captures[0]
      # the URL is now in match
      if $match.start_with?('http')
        # 1) it's an external link, leave it as it is
        $newLink = $match
      elsif $match.start_with?('#')
        # 2) it's an anchor in the local document re-write with the local document id prefixed  
        $newLink = "#internalLink_"+$pid+"_"+($match.gsub! '#', '')
      else
        # 3/4) it's a link to a page within the site scope
        
        # -- Firstly clean up the URL
        if $match =~ /#/
          # if there's an anchor remove it (anything after the #)
          $match = $match[/[^#]+/]
        end
        # swap ./ for absolute path
        if $match.start_with?('./')
          $match = currentPage['dir']+"/"+$match[2, $match.length]
        # if the string doesnt start with a / it cant be prefixed by the path, so prefix it
        elsif !($match.start_with?('/'))
          $match = currentPage['dir']+"/"+$match
        end
        # add index.html to the end if it's just a folder
        if $match.end_with?('/')
          $match = $match+"index.html"
        end
        
        # -- now work out if the linked to page is within the page scope
        $pageOutOfScope = true;
        for page in availablePages
          if (page['url'] == $match)
            # 3) the page is within the scope of the document, swap it for an anchor
            $pageOutOfScope = false;
#            puts "In Scope "+$match
            # get the pid for this specific page
            $current_pid = page['title'].downcase.delete('/')
            $current_pid.gsub!(/\s+/, '-')
            # make the link an anchor to it
            $newLink = "#contentsLink-"+$current_pid
          end
        end
        # 4) page is out of scope of the document put an absolute URL
        if $pageOutOfScope
#          puts $match+" not in scope - "+$newLink
          $notFoundPrefix = true
          # go through the URL prefixes in the site and swap them for the website paths
          for prefix in site['pdf-rewrite-prefixes']
            
            # make an absolute external URL for the link
            if $match.start_with?(prefix[0])
              $notFoundPrefix = false
              $newLink = site['pdf-default-base-url']+prefix[1]+$match[prefix[0].length, $match.length]
            end
          end 
          if $notFoundPrefix
            $newLink = site['pdf-default-base-url']+site['pdf-default-versioned-url-subpath']+$match
            puts "PDF link to "+$match+" in "+currentPage['path']+" has unknown prefix, routing to "+$newLink
          end
        end
      end
      # return the re-written link wrapped in the property
      "href=\""+$newLink+"\""
    }
  end

  Liquid::Template.register_filter self
end