in tools/archiver.py [0:0]
def main():
parser = argparse.ArgumentParser(description="Command line options.")
parser.add_argument(
"--lid", dest="lid", type=str, nargs=1, help="Alternate specific list ID"
)
parser.add_argument(
"--digest",
dest="digest",
action="store_true",
help="Only digest the email and spit out the generated ID, do not archive",
)
parser.add_argument(
"--altheader",
dest="altheader",
type=str,
nargs=1,
help="Alternate header for list ID",
)
parser.add_argument(
"--allowfrom",
dest="allowfrom",
type=str,
nargs=1,
help="(optional) source IP (mail server) to allow posts from, ignore if no match",
)
parser.add_argument(
"--ignore",
dest="ignorefrom",
type=str,
nargs=1,
help="Sender/list to ignore input from (owner etc)",
)
parser.add_argument(
"--private",
dest="private",
action="store_true",
help="This is a private archive",
)
parser.add_argument(
"--makedate",
dest="makedate",
action="store_true",
help="Use the archive timestamp as the email date instead of the Date header",
)
parser.add_argument(
"--quiet",
dest="quiet",
action="store_true",
help="Do not exit -1 if the email could not be parsed",
)
parser.add_argument(
"--verbose",
dest="verbose",
action="store_true",
help="Output additional log messages",
)
parser.add_argument(
"--html2text",
dest="html2text",
action="store_true",
help="Try to convert HTML to text if no text/plain message is found",
)
parser.add_argument(
"--dry",
dest="dry",
action="store_true",
help="Do not save emails to elasticsearch, only test parsing",
)
parser.add_argument(
"--ignorebody",
dest="ibody",
type=str,
nargs=1,
help="Optional email bodies to treat as empty (in conjunction with --html2text)",
)
parser.add_argument(
"--dumponfail",
dest="dump",
help="If pushing to ElasticSearch fails, dump documents in JSON format to this directory and "
"fail silently.",
)
parser.add_argument(
"--defaultepoch",
dest="defaultepoch",
help="If no date could be found in the email, use this epoch. Set to 'skip' to skip importing on bad date",
)
parser.add_argument("--generator", dest="generator", help="Override the generator.")
args = parser.parse_args()
if args.verbose:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
archie = Archiver(
generator=args.generator,
parse_html=args.html2text,
ignore_body=args.ibody,
verbose=args.verbose,
)
# use binary input so parser can use appropriate charset
input_stream = sys.stdin.buffer
try:
raw_message = input_stream.read()
try:
msg = parse_message(raw_message)
except Exception as err:
print("STDIN parser exception: %s" % err)
sys.exit(-1)
if args.altheader:
alt_header = args.altheader[0]
if alt_header in msg:
try:
msg.replace_header("List-ID", msg.get(alt_header))
except KeyError:
msg.add_header("list-id", msg.get(alt_header))
elif "altheader" in sys.argv:
alt_header = sys.argv[len(sys.argv) - 1]
if alt_header in msg:
try:
msg.replace_header("List-ID", msg.get(alt_header))
except KeyError:
msg.add_header("list-id", msg.get(alt_header))
# Set specific LID?
if args.lid and len(args.lid[0]) > 3:
try:
msg.replace_header("List-ID", args.lid[0])
except KeyError:
msg.add_header("list-id", args.lid[0])
# Ignore based on --ignore flag?
if args.ignorefrom:
ignore_from = args.ignorefrom[0]
if fnmatch.fnmatch(msg.get("from"), ignore_from) or (
msg.get("list-id") and fnmatch.fnmatch(msg.get("list-id"), ignore_from)
):
print("Ignoring message as instructed by --ignore flag")
sys.exit(0)
# Check CIDR if need be
if args.allowfrom:
c = netaddr.IPNetwork(args.allowfrom[0])
good = False
for line in msg.get_all("received") or []:
m = re.search(r"from .+\[(.+)]", line)
if m:
try:
ip = netaddr.IPAddress(m.group(1))
if ip in c:
good = True
msg.add_header("ip-whitelisted", "yes")
break
except ValueError:
pass
except netaddr.AddrFormatError:
pass
if not good:
print("No whitelisted IP found in message, aborting")
sys.exit(-1)
# Replace date header with $now?
if args.makedate:
msg.replace_header("date", email.utils.formatdate())
is_public = True
if args.private:
is_public = False
if "list-id" in msg:
list_data = collections.namedtuple(
"importmsg",
[
"list_id",
"archive_public",
"archive_policy",
"list_name",
"description",
],
)(
list_id=msg.get("list-id"),
archive_public=is_public,
archive_policy=None,
list_name=msg.get("list-id"),
description=msg.get("list-id"),
)
try:
lid, mid = archie.archive_message(list_data, msg, raw_message, args.dry, args.dump, args.defaultepoch, args.digest)
if args.digest:
print(mid)
else:
print(
"%s: Done archiving to %s as %s!"
% (email.utils.formatdate(), lid, mid)
)
except Exception as err:
if args.verbose:
traceback.print_exc()
print("Archiving failed!: %s" % err)
raise Exception("Archiving to ES failed") from err
else:
print("Nothing to import (no list-id found!)")
except Exception as err:
# extract the len number without using variables (which may cause issues?)
# last traceback 1st entry, 2nd field
line = traceback.extract_tb(sys.exc_info()[2])[0][1]
if args.quiet:
print(
"Could not parse email, but exiting quietly as --quiet is on: %s (@ %s)"
% (err, line)
)
else:
print("Could not parse email: %s (@ %s)" % (err, line))
sys.exit(-1)