in tools/archiver.py [0:0]
def compute_updates(self, lid, private, msg):
"""Determine what needs to be sent to the archiver.
:param lid: The list id
:param private: Whether privately archived email or not (bool)
:param msg: The message object
:return None if the message could not be parsed, otherwise a four-tuple consisting of:
the digested email as a dict, its attachments, its metadata fields and any
in-reply-to data found.
"""
ojson = None
if not lid:
lid = normalize_lid(msg.get('list-id'))
if self.cropout:
crops = self.cropout.split(" ")
# Regex replace?
if len(crops) == 2:
lid = re.sub(crops[0], crops[1], lid)
# Standard crop out?
else:
lid = lid.replace(self.cropout, "")
defaultEmptyString = lambda value: value and str(value) or ""
msg_metadata = dict([(k, defaultEmptyString(msg.get(k))) for k in self.keys])
mid = hashlib.sha224(str("%s-%s" % (lid, msg_metadata['archived-at'])).encode('utf-8')).hexdigest() + "@" + (lid if lid else "none")
for key in ['to','from','subject','message-id']:
try:
hval = ""
if msg_metadata.get(key):
for t in email.header.decode_header(msg_metadata[key]):
if t[1] == None or t[1].find("8bit") != -1:
hval += t[0].decode('utf-8') if type(t[0]) is bytes else t[0]
else:
hval += t[0].decode(t[1],errors='ignore')
msg_metadata[key] = hval
except Exception as err:
print("Could not decode headers, ignoring..: %s" % err)
mdate = None
try:
mdate = email.utils.parsedate_tz(msg_metadata.get('date'))
except:
pass
if not mdate and msg_metadata.get('archived-at'):
mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
elif not mdate:
print("Date (%s) seems totally wrong, setting to _now_ instead." % mdate)
mdate = time.gmtime() # Get a standard 9-tuple
mdate = mdate + (0, ) # Fake a TZ (10th element)
# mdate calculations are all done, prepare the index entry
epoch = email.utils.mktime_tz(mdate)
mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(epoch))
body = self.msgbody(msg)
saved_body = None # for format=flowed
try:
if 'content-type' in msg_metadata and msg_metadata['content-type'].find("flowed") != -1:
saved_body = body # so we can redo it properly later
# N.B. the convertToWrapped call usually fails, because body is a generally a string here
# However sometimes body is bytes at this point in which case it works
body = formatflowed.convertToWrapped(body, character_set="utf-8")
# DO NOT FIX IT -- otherwise generated MIDs will change
# The code now applies the formatting properly later
if isinstance(body, str):
body = body.encode('utf-8')
except Exception:
try:
body = body.decode(chardet.detect(body)['encoding'])
except Exception:
try:
body = body.decode('latin-1')
except:
try:
if isinstance(body, str):
body = body.encode('utf-8')
except:
body = None
attachments, contents = self.msgfiles(msg)
irt = ""
if body is not None or attachments:
pmid = mid
try:
mid = generators.generate(self.generator, msg, body, lid, attachments)
except Exception as err:
if logger:
# N.B. use .get just in case there is no message-id
logger.info("Could not generate MID using %s: %s. MSGID: %s", self.generator, err, msg_metadata.get('message-id', '?').strip())
mid = pmid
if 'in-reply-to' in msg_metadata:
try:
try:
irt = "".join(msg_metadata['in-reply-to'])
except:
irt = msg_metadata.get('in-reply-to').__str__()
except:
irt = ""
if not self.skipff and 'content-type' in msg_metadata and msg_metadata['content-type'].find("flowed") != -1:
if isinstance(saved_body, str):
saved_body = saved_body.encode('utf-8', 'replace')
try:
# Allow wrapping to be done on the client display by unwrapping
# to a single long line.
# The value 2000 should be more than enough for most email paragraphs.
# body = formatflowed.convertToWrapped(to_crlf(saved_body), width=2000, wrap_fixed=False, character_set="utf-8")
# formatflowed requires CRLF line endings, but generates LF endings...
# TEMP: disable conversion until can work out how to fix tests
body = formatflowed.convertToWrapped(saved_body, width=2000, wrap_fixed=False, character_set="utf-8")
except:
pass # Don't try to recover
ojson = {
'from_raw': msg_metadata['from'],
'from': msg_metadata['from'],
'to': msg_metadata['to'],
'subject': msg_metadata['subject'],
'message-id': msg_metadata['message-id'],
'mid': mid,
'cc': msg_metadata.get('cc'),
'epoch': epoch,
'list': lid,
'list_raw': lid,
'date': mdatestring,
'private': private,
'references': msg_metadata['references'],
'in-reply-to': irt,
'body': body.decode('utf-8', 'replace') if type(body) is bytes else body,
'attachments': attachments
}
return ojson, contents, msg_metadata, irt