in lib/linguist/tokenizer.rb [158:196]
def extract_sgml_tokens(data)
s = StringScanner.new(data)
tokens = []
until s.eos?
if token = s.scan(/<\/?[^\s>]+/)
tokens << "#{token}>"
elsif token = s.scan(/\w+=/)
tokens << token
if s.scan(/"/)
s.skip_until(/[^\\]"/)
elsif s.scan(/'/)
s.skip_until(/[^\\]'/)
else
s.skip_until(/\w+/)
end
elsif token = s.scan(/\w+/)
tokens << token
elsif s.scan(/>/)
s.terminate
else
s.getch
end
end
tokens
end