def msgbody()

in tools/archiver.py [0:0]


    def msgbody(self, msg):
        body = None
        body_charset = None # charset corresponding to chosen body (if any)
        firstHTML = None
        for part in msg.walk():
            # can be called from importer
            if self.verbose:
                print("Content-Type: %s" % part.get_content_type())
            """
                Find the first body part and the first HTML part
                Note: cannot use break here because firstHTML is needed if len(body) <= 1
            """
            try:
                if not body and part.get_content_type() == 'text/plain':
                    body = part.get_payload(decode=True)
                    body_charset = part.get_content_charset()
                if not body and part.get_content_type() == 'text/enriched':
                    body = part.get_payload(decode=True)
                    body_charset = part.get_content_charset()
                elif self.html and not firstHTML and part.get_content_type() == 'text/html':
                    firstHTML = part.get_payload(decode=True)
                    body_charset = part.get_content_charset()
            except Exception as err:
                print(err)

        # this requires a GPL lib, user will have to install it themselves
        if firstHTML and (not body or len(body) <= 1 or (self.ignore_body and str(body).find(str(self.ignore_body)) != -1)):
            body = self.html2text(firstHTML.decode("utf-8", 'ignore') if type(firstHTML) is bytes else firstHTML)
            # at this point body can no longer be bytes

        charsets = []
        # prefer the charset associated with the body (if any)
        if body_charset is not None:
            charsets.append(body_charset)
        for c in msg.get_charsets():
            if c is not None and c not in charsets:
                charsets.append(c)

        # See issue#463 also #244
        # This code will try at most one charset
        # If the decode fails, it will use utf-8
        for charset in charsets:
            try:
                body = body.decode(charset) if type(body) is bytes else body
                # at this point body can no longer be bytes
            except:
                body = body.decode('utf-8', errors='replace') if type(body) is bytes else body
                # at this point body can no longer be bytes

        # At this point body may be bytes or string
        return body