tokenize input

in lib/rdoc/markup/parser.rb [480:563]


  def tokenize input
    setup_scanner input

    until @s.eos? do
      pos = @s.pos

      
      
      next if @s.scan(/ +/)

      
      

      @tokens << case
                 
                 when @s.scan(/\r?\n/) then
                   token = [:NEWLINE, @s.matched, *pos]
                   @s.newline!
                   token
                 
                 when @s.scan(/(=+)(\s*)/) then
                   level = @s[1].length
                   header = [:HEADER, level, *pos]

                   if @s[2] =~ /^\r?\n/ then
                     @s.unscan(@s[2])
                     header
                   else
                     pos = @s.pos
                     @s.scan(/.*/)
                     @tokens << header
                     [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
                   end
                 
                 when @s.scan(/(-{3,}) *\r?$/) then
                   [:RULE, @s[1].length - 2, *pos]
                 
                 when @s.scan(/([*-]) +(\S)/) then
                   @s.unscan(@s[2])
                   [:BULLET, @s[1], *pos]
                 
                 when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
                   
                   
                   
                   
                   
                   list_label = @s[1]
                   @s.unscan(@s[2])
                   list_type =
                     case list_label
                     when /[a-z]/ then :LALPHA
                     when /[A-Z]/ then :UALPHA
                     when /\d/    then :NUMBER
                     else
                       raise ParseError, "BUG token #{list_label}"
                     end
                   [list_type, list_label, *pos]
                 
                 when @s.scan(/\[(.*?)\]( +|\r?$)/) then
                   [:LABEL, @s[1], *pos]
                 
                 when @s.scan(/(.*?)::( +|\r?$)/) then
                   [:NOTE, @s[1], *pos]
                 
                 when @s.scan(/>>> *(\w+)?$/) then
                   [:BLOCKQUOTE, @s[1], *pos]
                 
                 else
                   @s.scan(/(.*?)(  )?\r?$/)
                   token = [:TEXT, @s[1], *pos]

                   if @s[2] then
                     @tokens << token
                     [:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
                   else
                     token
                   end
                 end
    end

    self
  end