in tools/archiver.py [0:0]
def main():
parser = argparse.ArgumentParser(description='Command line options.')
parser.add_argument('--lid', dest='lid', type=str, nargs=1,
help='Alternate specific list ID')
parser.add_argument('--altheader', dest='altheader', type=str, nargs=1,
help='Alternate header for list ID')
parser.add_argument('--allowfrom', dest='allowfrom', type=str, nargs=1,
help='(optional) source IP (mail server) to allow posts from, ignore if no match')
parser.add_argument('--ignore', dest='ignorefrom', type=str, nargs=1,
help='Sender/list to ignore input from (owner etc)')
parser.add_argument('--private', dest='private', action='store_true',
help='This is a private archive')
parser.add_argument('--makedate', dest='makedate', action='store_true',
help='Use the archive timestamp as the email date instead of the Date header')
parser.add_argument('--quiet', dest='quiet', action='store_true',
help='Do not exit -1 if the email could not be parsed')
parser.add_argument('--verbose', dest='verbose', action='store_true',
help='Output additional log messages')
parser.add_argument('--html2text', dest='html2text', action='store_true',
help='Try to convert HTML to text if no text/plain message is found')
parser.add_argument('--dry', dest='dry', action='store_true',
help='Do not save emails to elasticsearch, only test parsing')
parser.add_argument('--ignorebody', dest='ibody', type=str, nargs=1,
help='Optional email bodies to treat as empty (in conjunction with --html2text)')
parser.add_argument('--dumponfail', dest='dump',
help='If pushing to ElasticSearch fails, dump documents in JSON format to this directory and '
'fail silently.')
parser.add_argument('--generator', dest='generator',
help='Override the generator.')
parser.add_argument('--skipff', dest = 'skipff', action='store_true',
help = 'Skip final format=flowed processing (mainly for unit-testing)')
args = parser.parse_args()
if args.verbose:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
else:
# elasticsearch logs lots of warnings on retries/connection failure
# Also eliminates: 'Undecodable raw error response from server:' warning message
logging.getLogger("elasticsearch").setLevel(logging.ERROR)
archie = Archiver(generator=args.generator, parse_html=args.html2text, ignore_body=args.ibody, verbose=args.verbose, dump_dir=args.dump)
# use binary input so parser can use appropriate charset
input_stream = sys.stdin.buffer
try:
raw_message = input_stream.read()
try:
msg = email.message_from_bytes(raw_message)
except Exception as err:
print("STDIN parser exception: %s" % err)
# We're reading from STDIN, so let's fake an MM3 call
if args.altheader:
alt_header = args.altheader[0]
if alt_header in msg:
try:
msg.replace_header('List-ID', msg.get(alt_header))
except:
msg.add_header('list-id', msg.get(alt_header))
elif 'altheader' in sys.argv:
alt_header = sys.argv[len(sys.argv)-1]
if alt_header in msg:
try:
msg.replace_header('List-ID', msg.get(alt_header))
except:
msg.add_header('list-id', msg.get(alt_header))
# Set specific LID?
if args.lid and len(args.lid[0]) > 3:
try:
msg.replace_header('List-ID', args.lid[0])
except:
msg.add_header('list-id', args.lid[0])
#Ignore based on --ignore flag?
if args.ignorefrom:
ignore_from = args.ignorefrom[0]
if fnmatch.fnmatch(msg.get("from"), ignore_from) or (msg.get("list-id") and fnmatch.fnmatch(msg.get("list-id"), ignore_from)):
print("Ignoring message as instructed by --ignore flag")
sys.exit(0)
# Check CIDR if need be
if args.allowfrom:
c = netaddr.IPNetwork(args.allowfrom[0])
good = False
for line in msg.get_all('received') or []:
m = re.search(r"from .+\[(.+)\]", line)
if m:
try:
ip = netaddr.IPAddress(m.group(1))
if ip in c:
good = True
msg.add_header("ip-whitelisted", "yes")
break
except:
pass
if not good:
print("No whitelisted IP found in message, aborting")
sys.exit(-1)
# Replace date header with $now?
if args.makedate:
msg.replace_header('date', email.utils.formatdate())
is_public = True
if args.private:
is_public = False
if 'list-id' in msg:
list_data = collections.namedtuple('importmsg', ['list_id', 'archive_public'])(list_id=msg.get('list-id'),
archive_public=is_public)
try:
lid, mid = archie.archive_message(args, list_data, msg, raw_message)
print("%s: Done archiving to %s as %s!" % (email.utils.formatdate(), lid, mid))
except Exception as err:
if args.verbose:
traceback.print_exc()
print("Archiving failed!: %s" % err)
raise Exception("Archiving to ES failed")
else:
print("Nothing to import (no list-id found!)")
except Exception as err:
# extract the len number without using variables (which may cause issues?)
# last traceback 1st entry, 2nd field
line = traceback.extract_tb(sys.exc_info()[2])[0][1]
if args.quiet:
print("Could not parse email, but exiting quietly as --quiet is on: %s (@ %s)" % (err, line))
else:
print("Could not parse email: %s (@ %s)" % (err, line))
sys.exit(-1)