in lib/rdoc/markup/parser.rb [480:563]
def tokenize input
setup_scanner input
until @s.eos? do
pos = @s.pos
next if @s.scan(/ +/)
@tokens << case
when @s.scan(/\r?\n/) then
token = [:NEWLINE, @s.matched, *pos]
@s.newline!
token
when @s.scan(/(=+)(\s*)/) then
level = @s[1].length
header = [:HEADER, level, *pos]
if @s[2] =~ /^\r?\n/ then
@s.unscan(@s[2])
header
else
pos = @s.pos
@s.scan(/.*/)
@tokens << header
[:TEXT, @s.matched.sub(/\r$/, ''), *pos]
end
when @s.scan(/(-{3,}) *\r?$/) then
[:RULE, @s[1].length - 2, *pos]
when @s.scan(/([*-]) +(\S)/) then
@s.unscan(@s[2])
[:BULLET, @s[1], *pos]
when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
list_label = @s[1]
@s.unscan(@s[2])
list_type =
case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
[list_type, list_label, *pos]
when @s.scan(/\[(.*?)\]( +|\r?$)/) then
[:LABEL, @s[1], *pos]
when @s.scan(/(.*?)::( +|\r?$)/) then
[:NOTE, @s[1], *pos]
when @s.scan(/>>> *(\w+)?$/) then
[:BLOCKQUOTE, @s[1], *pos]
else
@s.scan(/(.*?)( )?\r?$/)
token = [:TEXT, @s[1], *pos]
if @s[2] then
@tokens << token
[:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
else
token
end
end
end
self
end