jekyll/plugins/markdown_converter.rb (187 lines of code) (raw):
require 'kramdown'
require 'rouge'
require 'uri'
module Jekyll
module Converters
class Markdown
class CustomKramdownParser
def initialize(config)
@config = config
end
def convert(content)
options = Utils.symbolize_hash_keys(@config['kramdown'])
options[:baseurl] = @config['baseurl']
options[:upsource] = Utils.symbolize_hash_keys(@config['upsource'])
Kramdown::Document.new(content, options).to_upsrc
end
end
end
end
end
module Kramdown
module Converter
class Upsrc < Html
def convert_header(el, indent)
attr = el.attr.dup
el_id = generate_id(el.options[:raw_text])
if @options[:auto_ids] && !attr['id']
attr['id'] = el_id
end
@toc << [el.options[:level], el_id, el.children] if el_id && in_toc?(el)
level = output_header_level(el.options[:level])
if level <= 3
anchor = Element.new(:a, nil, {'href' => '#' + el_id, 'class' => 'anchor-link'})
el.children.push(anchor)
end
anchor = format_as_block_html("a", {'name' => el_id, 'class' => 'elem-anchor'}, inner(Element.new(:a, nil), indent), indent)
header = format_as_block_html("h#{level}", attr, inner(el, indent), indent)
anchor + header
end
# Convert a <code> block
# Overrides Html#convert_codeblock to support highlighting lines in the class
# attribute (class="csharp{1-3}") and to change the generated elements and attributes
# to use expected webhelp values
def convert_codeblock(el, indent)
attr = el.attr.dup
lang = self.extract_code_language(attr) || 'text'
highlight_lines = ''
if attr['class'] and attr['class'].scan(/\{[\d\-\,]+\}/).length > 0
lang_parts = attr['class'].split('{')
highlight_lines = "{#{lang_parts[1]}"
end
div_attr = {}
div_attr['class'] = "code-block"
div_attr['data-lang'] = lang if lang
code = highlight_code(el.value, lang, :block, { :highlight_lines => highlight_lines })
code_attr = {}
code_attr['class'] = "code-block__wrapper"
format_as_block_html('div', div_attr, format_as_span_html('code', code_attr, code), 0)
end
# Extract the code block/span language from the class attribute, if specified.
# Skip any {} chars (used for highlighting lines)
def extract_code_language(attr)
if attr['class']
class_attr = attr['class']
if class_attr.scan(/\{|\}/).length > 0
class_attr = class_attr.split('{')[0]
end
class_attr.scan(/\blanguage-(\w+)\b/).first.first
end
end
# Convert a code span element
# Overrides Html#convert_codespan to provide different class attributes, as the default
# implementation only provides 'highlighter-pygments'
def convert_codespan(el, indent)
attr = el.attr.dup
lang = extract_code_language!(attr) || 'text'
result = highlight_code(el.value, lang, :span)
attr['class'] = 'code'
attr['class'] += " highlight language-#{lang}" if lang
format_as_span_html('code', attr, result)
end
# Override Html#convert_a to identify external links. Also converts .md links to .html
def convert_a(el, indent)
res = inner(el, indent)
attr = el.attr.dup
attr['href'] = '' if attr['href'].nil?
href = convert_href(attr['href'])
is_external = href.start_with?('http://', 'https://', 'ftp://', '//')
attr['data-bypass'] = 'yes' if is_external
if href.start_with?('mailto:')
mail_addr = href[7..-1]
attr['href'] = obfuscate('mailto') << ":" << obfuscate(mail_addr)
res = obfuscate(res) if res == mail_addr
end
href = @options[:baseurl] + href[1, href.length - 1] if href.start_with?('/') and !href.start_with?('//')
uri = URI(href)
uri.path = uri.path.chomp(File.extname(uri.path)) + '.html' if !uri.path.nil? and File.extname(uri.path) == '.md' and !is_external
attr['href'] = uri.to_s
attr['target'] = '_blank' if is_external
format_as_span_html(el.type, attr, "<span>#{res}</span>")
end
# TODO: I don't really like this here. Everything else is all about converting
# the document, and this is expanding a link href to Upsource. Not sure where
# else to put it though, without creating some new kind of extension point,
# which is overkill
def convert_href(href)
if href.start_with?('upsource://')
opts = @options[:upsource]
# Consider the upsource: protocol to actually be upsource://host/path, where host
# is server, repo + commit SHA. Just like the file: protocol, the host can be
# skipped, in which case, we'll use the values from config. Parsing the host hasn't
# been implemented, because I'm lazy and we don't actually need it right now, but
# will at least have the space in the URL to add it when we do.
# This implies that the path needs to start with a slash, which in turn means *three*
# slashes for the plain upsource: protocol - upsource:///path/to/file.java
server = opts[:server]
repo = opts[:repo]
revision = if opts[:commit] == 'HEAD' then 'HEAD' else "#{repo}-#{opts[:commit]}" end
path = href[11..-1]
raise 'Upsource link must be in the form upsource:///path/to/file.java. Note the 3 slashes!' unless path.start_with?('/')
# e.g. https://upsource.jetbrains.com/idea-ce/file/idea-ce-1731d054af4ca27aa827c03929e27eeb0e6a8366/platform/editor-ui-api/src/com/intellij/openapi/actionSystem/AnAction.java
# or /file/HEAD/platform/...
href = 'https://' + server + "/#{opts[:repo]}/file/#{revision}" + path
end
href
end
def convert_img(el, indent)
attr = el.attr.dup
src = attr['src']
src = @options[:baseurl] + src[1, src.length - 1] if src.start_with?('/') and !src.start_with?('//')
attr['src'] = src
"<img#{html_attributes(attr)} />"
end
def convert_blockquote(el, indent)
if el.children[0].type == :p and el.children[0].children[0].type == :strong
p = el.children[0]
type = inner_text(p.children[0], []).downcase.gsub(/\s+/, '')
# Remove the "Note" bold text from the first paragraph
p.children.slice!(0)
type = type + ' ' + el.attr['class'] unless el.attr['class'].nil?
el.attr['class'] = type
return format_seealso(el, indent) if type == 'seealso'
end
format_as_indented_block_html('aside', el.attr, inner(el, indent), indent)
end
def inner_text(el, stack)
result = ''
stack.push el
result << el.value unless el.value.nil?
el.children.each do |inner_el|
result << inner_text(inner_el, stack)
end
stack.pop
result
end
def format_seealso(el, indent)
h2 = format_as_span_html('h2', [], 'See Also')
header = format_as_indented_block_html('div', {'class' => 'seealso__header'}, h2, indent)
columns = ''
column = ''
for c in el.children do
if c.type == :blank
columns += format_as_indented_block_html('div', {'class' => 'seealso__col'}, column, indent) unless column == ''
column = ''
else
column += convert(c, indent)
end
end
columns += format_as_indented_block_html('div', {'class' => 'seealso__col'}, column, indent) unless column == ''
content = format_as_indented_block_html('div', {'class' => 'seealso__content'}, columns, indent)
format_as_indented_block_html('section', el.attr, header+content, indent)
end
end
# Rouge doesn't support highlighting lines - see jneen/rouge#264
# If/when it does, rewrite this to override the default implmentation
# from Kramdown, to pass in the line numbers to highlight. See
# kramdown/converters/syntax_highlighters/rouge.rb for the actual code
#
# module SyntaxHighlighter
# module Rouge
# ::Kramdown::Converter.add_syntax_highlighter(:rouge, Rouge)
#
# def self.call(converter, text, lang, type, code_opts)
# # TODO: Merge this with :options below
# opts = converter.options[:syntax_highlighter_opts].dup
#
# hl_lines = ''
# highlight_lines = code_opts[:highlight_lines] || ''
# if highlight_lines
# hl_lines = highlight_lines.gsub(/[{}]/, '').split(',').map do |ln|
# if matches = /(\d+)-(\d+)/.match(ln)
# ln = Range.new(matches[1], matches[2]).to_a.join(' ')
# end
# ln
# end.join(' ')
# end
#
# if lang
# ::Pygments.highlight(text,
# :lexer => lang || 'text',
# :options => {
# :encoding => 'utf-8',
# :nowrap => true,
# :hl_lines => hl_lines
# })
# else
# escape_html(text)
# end
# end
# end
# end
end
# GFM parser doesn't support indented code blocks, i.e. those that are inside a list
module Parser
class GFM2 < GFM
def parse
super
end
# Note that we need to keep the same number of capturing groups
# The parsing code relies on this order
FENCED_CODEBLOCK_MATCH = /^([ \t]*([~`]){3,})\s*((\w+)(?:\{[\,\d\-]+\})?)?\s*?\n(.*?)^\1.*?\n/m
define_parser(:codeblock_fenced_gfm_indented, /^[ \t]*[~`]{3,}/, nil, 'parse_codeblock_fenced')
def initialize(source, options)
super
{:codeblock_fenced_gfm => :codeblock_fenced_gfm_indented}.each do |current, replacement|
i = @block_parsers.index(current)
@block_parsers.delete(current)
@block_parsers.insert(i, replacement)
end
end
def parse_codeblock_fenced
if @src.check(self.class::FENCED_CODEBLOCK_MATCH)
start_line_number = @src.current_line_number
@src.pos += @src.matched_size
el = new_block_el(:codeblock, unindent(@src[5]), nil, :location => start_line_number)
lang = @src[3].to_s.strip
unless lang.empty?
el.options[:lang] = lang
el.attr['class'] = "language-#{@src[4]}"
end
@tree.children << el
true
else
false
end
end
def unindent(s)
s.gsub(/^#{s.scan(/^[ \t]*(?=\S)/).min}/, '')
end
end
end
end