dump_all_tokens

in lib/linguist/classifier.rb [147:169]


      def dump_all_tokens(tokens, languages)
        maxlen = tokens.map { |tok| tok.size }.max
        
        printf "%#{maxlen}s", ""
        puts "    #" + languages.map { |lang| sprintf("%10s", lang) }.join
        
        tokmap = Hash.new(0)
        tokens.each { |tok| tokmap[tok] += 1 }
        
        tokmap.sort.each { |tok, count|
          arr = languages.map { |lang| [lang, token_probability(tok, lang)] }
          min = arr.map { |a,b| b }.min
          minlog = Math.log(min)
          if !arr.inject(true) { |result, n| result && n[1] == arr[0][1] }
            printf "%#{maxlen}s%5d", tok, count
            
            puts arr.map { |ent|
              ent[1] == min ? "         -" : sprintf("%10.3f", count * (Math.log(ent[1]) - minlog))
            }.join
          end
        }
      end