def compute_updates()

in tools/archiver.py [0:0]


    def compute_updates(self, lid, private, msg):
        """Determine what needs to be sent to the archiver.

        :param lid: The list id
        :param private: Whether privately archived email or not (bool)
        :param msg: The message object

        :return None if the message could not be parsed, otherwise a four-tuple consisting of:
                the digested email as a dict, its attachments, its metadata fields and any
                in-reply-to data found.
        """

        ojson = None
        if not lid:
            lid = normalize_lid(msg.get('list-id'))
        if self.cropout:
            crops = self.cropout.split(" ")
            # Regex replace?
            if len(crops) == 2:
                lid = re.sub(crops[0], crops[1], lid)
            # Standard crop out?
            else:
                lid = lid.replace(self.cropout, "")

        defaultEmptyString = lambda value: value and str(value) or ""
        msg_metadata = dict([(k, defaultEmptyString(msg.get(k))) for k in self.keys])
        mid = hashlib.sha224(str("%s-%s" % (lid, msg_metadata['archived-at'])).encode('utf-8')).hexdigest() + "@" + (lid if lid else "none")
        for key in ['to','from','subject','message-id']:
            try:
                hval = ""
                if msg_metadata.get(key):
                    for t in email.header.decode_header(msg_metadata[key]):
                        if t[1] == None or t[1].find("8bit") != -1:
                            hval += t[0].decode('utf-8') if type(t[0]) is bytes else t[0]
                        else:
                            hval += t[0].decode(t[1],errors='ignore')
                    msg_metadata[key] = hval
            except Exception as err:
                print("Could not decode headers, ignoring..: %s" % err)
        mdate = None
        try:
            mdate = email.utils.parsedate_tz(msg_metadata.get('date'))
        except:
            pass
        if not mdate and msg_metadata.get('archived-at'):
            mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
        elif not mdate:
            print("Date (%s) seems totally wrong, setting to _now_ instead." % mdate)
            mdate = time.gmtime() # Get a standard 9-tuple
            mdate = mdate + (0, ) # Fake a TZ (10th element)

        # mdate calculations are all done, prepare the index entry
        epoch = email.utils.mktime_tz(mdate)
        mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(epoch))
        body = self.msgbody(msg)
        saved_body = None # for format=flowed
        try:
            if 'content-type' in msg_metadata and msg_metadata['content-type'].find("flowed") != -1:
                saved_body = body # so we can redo it properly later
                # N.B. the convertToWrapped call usually fails, because body is a generally a string here
                # However sometimes body is bytes at this point in which case it works
                body = formatflowed.convertToWrapped(body, character_set="utf-8")
                # DO NOT FIX IT -- otherwise generated MIDs will change
                # The code now applies the formatting properly later
            if isinstance(body, str):
                body = body.encode('utf-8')
        except Exception:
            try:
                body = body.decode(chardet.detect(body)['encoding'])
            except Exception:
                try:
                    body = body.decode('latin-1')
                except:
                    try:
                        if isinstance(body, str):
                            body = body.encode('utf-8')
                    except:
                        body = None

        attachments, contents = self.msgfiles(msg)
        irt = ""
        if body is not None or attachments:
            pmid = mid
            try:
                mid = generators.generate(self.generator, msg, body, lid, attachments)
            except Exception as err:
                if logger:
                    # N.B. use .get just in case there is no message-id
                    logger.info("Could not generate MID using %s: %s. MSGID: %s", self.generator, err, msg_metadata.get('message-id', '?').strip())
                mid = pmid

            if 'in-reply-to' in msg_metadata:
                try:
                    try:
                        irt = "".join(msg_metadata['in-reply-to'])
                    except:
                        irt = msg_metadata.get('in-reply-to').__str__()
                except:
                    irt = ""

            if not self.skipff and 'content-type' in msg_metadata and msg_metadata['content-type'].find("flowed") != -1:
                if isinstance(saved_body, str):
                    saved_body = saved_body.encode('utf-8', 'replace')
                try:
                    # Allow wrapping to be done on the client display by unwrapping
                    # to a single long line.
                    # The value 2000 should be more than enough for most email paragraphs.
                    # body = formatflowed.convertToWrapped(to_crlf(saved_body), width=2000, wrap_fixed=False, character_set="utf-8")
                    # formatflowed requires CRLF line endings, but generates LF endings...
                    # TEMP: disable conversion until can work out how to fix tests
                    body = formatflowed.convertToWrapped(saved_body, width=2000, wrap_fixed=False, character_set="utf-8")
                except:
                    pass # Don't try to recover

            ojson = {
                'from_raw': msg_metadata['from'],
                'from': msg_metadata['from'],
                'to': msg_metadata['to'],
                'subject': msg_metadata['subject'],
                'message-id': msg_metadata['message-id'],
                'mid': mid,
                'cc': msg_metadata.get('cc'),
                'epoch': epoch,
                'list': lid,
                'list_raw': lid,
                'date': mdatestring,
                'private': private,
                'references': msg_metadata['references'],
                'in-reply-to': irt,
                'body': body.decode('utf-8', 'replace') if type(body) is bytes else body,
                'attachments': attachments
            }

        return  ojson, contents, msg_metadata, irt