in tools/mboxhdr2csv.rb [268:357]
def mbox2stats(f)
begin
mails = read_mbox(f)
rescue => e
return nil, e
end
errs = []
messages = []
order = 0
mails.each do |message|
mdata = {}
mail = nil
begin
order += 1
mail = Mail.read_from_string(message.gsub(/\r?\n/, "\r\n"))
mdata[:order] = order
begin
mdata[:from] = mail[:from].value
mdata[:subject] = mail[:subject].value
mdata[:listid] = mail[:List_Id].value
mdata[:date] = mail.date.to_s
rescue => _e
mdata[:from] = mail[:from]
mdata[:subject] = mail[:subject]
mdata[:listid] = mail[:List_Id]
mdata[:date] = mail.date.to_s
mdata[:parseerr] = mail.errors
end
mdata[:messageid] = mail.message_id
mdata[:inreplyto] = mail.in_reply_to
if mail.multipart?
text_part = mail.text_part.decoded.split(/\r?\n/)
else
text_part = mail.body.decoded.split(/\r?\n/)
end
ctr = 0
links = 0
text_part.each do |l|
case l
when /\A\s*>/
when /\A\s*\z/
when /\AOn.*wrote:\z/
when /\A-----Original Message-----/
break
else
links += 1 if l =~ URIRX
ctr += 1
end
end
mdata[:lines] = ctr
mdata[:links] = links
MailUtils.find_who_from(mdata)
begin
d = Time.parse(mdata[:date])
mdata[:y] = d.year
mdata[:m] = d.month
mdata[:d] = d.day
mdata[:w] = d.wday
mdata[:h] = d.hour
mdata[:z] = d.zone
rescue => _e
puts "DEBUG: #{e.message} parsing: #{mdata[:date]}"
end
regex = MailUtils::NONDISCUSSION_SUBJECTS[mdata[:listid]]
if regex
regex.each do |typ, rx|
if mdata[:subject] =~ rx
mdata[:nondiscuss] = typ
break
end
end
end
messages << mdata
rescue => e
errs << [e, mdata[:order]]
end
end
return messages, errs
end