mbox2stats

in tools/mboxhdr2csv.rb [268:357]


  def mbox2stats(f)
    begin
      mails = read_mbox(f)
    rescue => e
      return nil, e
    end
    errs = []
    messages = []
    order = 0
    mails.each do |message|
      mdata = {}
      mail = nil
      begin
        
        order += 1
        
        mail = Mail.read_from_string(message.gsub(/\r?\n/, "\r\n"))
        mdata[:order] = order
        begin 
          mdata[:from] = mail[:from].value
          mdata[:subject] = mail[:subject].value
          mdata[:listid] = mail[:List_Id].value
          mdata[:date] = mail.date.to_s
        rescue => _e
          mdata[:from] = mail[:from]
          mdata[:subject] = mail[:subject]
          mdata[:listid] = mail[:List_Id]
          mdata[:date] = mail.date.to_s
          mdata[:parseerr] = mail.errors
        end
        mdata[:messageid] = mail.message_id
        mdata[:inreplyto] = mail.in_reply_to
        if mail.multipart?
          text_part = mail.text_part.decoded.split(/\r?\n/)
        else
          text_part = mail.body.decoded.split(/\r?\n/)
        end
        ctr = 0 
        links = 0 
        text_part.each do |l|
          case l
          when /\A\s*>/
            
          when /\A\s*\z/
            
          when /\AOn.*wrote:\z/
            
          when /\A-----Original Message-----/
            
            break
            
          else
            links += 1 if l =~ URIRX
            ctr += 1
          end
        end
        mdata[:lines] = ctr
        mdata[:links] = links
        
        MailUtils.find_who_from(mdata)
        begin
          d = Time.parse(mdata[:date])
          mdata[:y] = d.year
          mdata[:m] = d.month
          mdata[:d] = d.day
          mdata[:w] = d.wday
          mdata[:h] = d.hour
          mdata[:z] = d.zone
        rescue => _e
          
          puts "DEBUG: #{e.message} parsing: #{mdata[:date]}"
        end
        regex = MailUtils::NONDISCUSSION_SUBJECTS[mdata[:listid]] 
        if regex
          regex.each do |typ, rx|
            if mdata[:subject] =~ rx
              mdata[:nondiscuss] = typ
              break 
            end
          end
        end
        
        messages << mdata
      rescue => e
        errs << [e, mdata[:order]]
      end
    end
    return messages, errs
  end