tools/plugins/generators.py (27 lines of code) (raw):

#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ This file contains the various newer generation ID generators for Pony Mail's archivers. For older ID generators, see generators_old.py """ import hashlib if not __package__: import dkim_id else: from . import dkim_id # DKIM-ID generator: uses DKIM canonicalisation # Recommended as default for clusters def dkimid(_msg, _body, lid, _attachments, raw_msg): r""" DKIM-ID generator: truncated SHA-256 HMAC if DKIM input We use the headers recommended in RFC 4871, plus DKIM-Signature Parameters: _msg - the parsed message (not used) _body - the parsed text content (not used) lid - list id _attachments - list of attachments (not used) raw_msg - the original message bytes Returns: str "<dkimid>", a 32 lower char base32 encoded SHA-256 HMAC >>> dkimid(None, None, None, None, b"") '8fgp2do75oqo6qd08vs4p7dpp1gj4vjn' """ if isinstance(lid, str): lid = lid.encode("utf-8", errors="replace") return dkim_id.dkim_id(raw_msg, lid) # Full generator: uses the entire email (including server-dependent data) # Used by default until August 2020. # See 'dkim' for recommended generation. def full(msg, _body, lid, _attachments, _raw_msg): """ Full generator: uses the entire email (including server-dependent data) The id is almost certainly unique, but different copies of the message are likely to have different headers, thus ids WARNING: the archiver by default adds an archived-at header with the current time. This is included in the hash, so messages will get different Permalinks if reloaded from source Note also that the policy (compat32, default, smtputf8) affects the contents returned by as_bytes() and thus the generated id. Parameters: msg - the parsed message _body - the parsed text content (not used) lid - list id _attachments - list of attachments (not used) _raw_msg - the original message bytes (not used) Returns: "<hash>@<lid>" where hash is sha224 of message bytes """ mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid) return mid __GENERATORS = { 'dkim': dkimid, 'full': full, } def generator(name): try: return __GENERATORS[name] except KeyError: print("WARN: generator %s not found, defaulting to 'dkim'" % name) return dkimid def generate(name, msg, body, lid, attachments, raw_msg): return generator(name)(msg, body, lid, attachments, raw_msg) def generator_names(): return list(__GENERATORS)