tools/setup.py (461 lines of code) (raw):

#!/usr/bin/env python3 # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import importlib.util import logging import os import os.path import shutil import sys if sys.version_info <= (3, 3): print("This script requires Python 3.4 or higher") sys.exit(-1) # Check for all required python packages wanted_pkgs = [ 'elasticsearch',# used by setup.py, archiver.py and elastic.py 'formatflowed', # used by archiver.py 'netaddr', # used by archiver.py 'certifi', # used by archiver.py and elastic.py ] missing_pkgs = list(wanted_pkgs) # copy to avoid corruption for pkg in wanted_pkgs: if importlib.util.find_spec(pkg): missing_pkgs.remove(pkg) if missing_pkgs: print("It looks like you need to install some python modules first") print("The following packages are required: ") for pkg in missing_pkgs: print(" - %s" % pkg) print("You may use your package manager, or run the following command:") print("pip3 install %s" % " ".join(missing_pkgs)) sys.exit(-1) # at this point we can assume elasticsearch is present from elasticsearch import Elasticsearch from elasticsearch import ElasticsearchException from elasticsearch import ConnectionError as ES_ConnectionError from elasticsearch import VERSION as ES_VERSION ES_MAJOR = ES_VERSION[0] # CLI arg parsing parser = argparse.ArgumentParser(description='Command line options.') parser.add_argument('--defaults', dest='defaults', action='store_true', help='Use default settings') parser.add_argument('--dbprefix', dest='dbprefix') parser.add_argument('--clobber', dest='clobber', action='store_true', help='Allow overwrite of ponymail.cfg & ../site/api/lib/config.lua (default: create *.tmp if either exists)') parser.add_argument('--dbhost', dest='dbhost', type=str, help='ES backend hostname') parser.add_argument('--dbport', dest='dbport', type=str, help='DB port') parser.add_argument('--dbname', dest='dbname', type=str, help='ES DB name') parser.add_argument('--dbshards', dest='dbshards', type=int, help='DB Shard Count') parser.add_argument('--dbreplicas', dest='dbreplicas', type=int, help='DB Replica Count') parser.add_argument('--mailserver', dest='mailserver', type=str, help='Host name of outgoing mail server') parser.add_argument('--mldom', dest='mldom', type=str, help='Domains to accept mail for via UI') parser.add_argument('--wordcloud', dest='wc', action='store_true', help='Enable word cloud') parser.add_argument('--skiponexist', dest='soe', action='store_true', help='Skip setup if ES index exists') parser.add_argument('--noindex', dest='noi', action='store_true', help="Don't make an ES index, assume it exists") parser.add_argument('--nocloud', dest='nwc', action='store_true', help='Do not enable word cloud') parser.add_argument('--generator', dest='generator', type=str, help='Document ID Generator to use (legacy, medium, cluster, full)') args = parser.parse_args() print("Welcome to the Pony Mail setup script!") print("Let's start by determining some settings...") print("") hostname = "" port = 0 dbname = "" mlserver = "" mldom = "" wc = "" genname = "" wce = False shards = 0 replicas = -1 urlPrefix = None # If called with --defaults (like from Docker), use default values if args.defaults: hostname = "localhost" port = 9200 dbname = "ponymail" mlserver = "localhost" mldom = "example.org" wc = "Y" wce = True shards = 1 replicas = 0 genname = "cluster" urlPrefix = '' # Accept CLI args, copy them if args.dbprefix: urlPrefix = args.dbprefix if args.dbhost: hostname = args.dbhost if args.dbport: port = int(args.dbport) if args.dbname: dbname = args.dbname if args.mailserver: mlserver = args.mailserver if args.mldom: mldom = args.mldom if args.wc: wc = args.wc if args.nwc: wc = False if args.dbshards: shards = args.dbshards if args.dbreplicas: replicas = args.dbreplicas if args.generator: genname = args.generator while hostname == "": hostname = input("What is the hostname of the ElasticSearch server? (e.g. localhost): ") while urlPrefix == None: urlPrefix = input("Database URL prefix if any (hit enter if none): ") while port < 1: try: port = int(input("What port is ElasticSearch listening on? (normally 9200): ")) except ValueError: pass while dbname == "": dbname = input("What would you like to call the mail index (e.g. ponymail): ") while mlserver == "": mlserver = input("What is the hostname of the outgoing mailserver? (e.g. mail.foo.org): ") while mldom == "": mldom = input("Which domains would you accept mail to from web-replies? (e.g. foo.org or *): ") while wc == "": wc = input("Would you like to enable the word cloud feature? (Y/N): ") if wc.lower() == "y": wce = True while genname == "": gens = ['legacy', 'medium', 'cluster', 'full'] print ("Please select a document ID generator:") print("1 LEGACY: The original document generator for v/0.1-0.8 (no longer recommended)") print("2 MEDIUM: The medium comprehensive generator for v/0.9 (no longer recommended)") print("3 REDUNDANT: Near-full message digest, discard MTA trail (recommended for clustered setups)") print("4 FULL: Full message digest with MTA trail (recommended for single-node setups).") try: gno = int(input("Please select a generator [1-4]: ")) if gno <= len(gens) and gens[gno-1]: genname = gens[gno-1] except ValueError: pass while shards < 1: try: shards = int(input("How many shards for the ElasticSearch index? ")) except ValueError: pass while replicas < 0: try: replicas = int(input("How many replicas for each shard? ")) except ValueError: pass print("Okay, I got all I need, setting up Pony Mail...") def createIndex(): # Check if index already exists if es.indices.exists(dbname): if args.soe: print("ElasticSearch index '%s' already exists and SOE set, exiting quietly" % dbname) sys.exit(0) else: print("Error: ElasticSearch index '%s' already exists!" % dbname) sys.exit(-1) print("Creating index " + dbname) settings = { "number_of_shards" : shards, "number_of_replicas" : replicas } mappings = { "mbox" : { "properties" : { "@import_timestamp" : { "type" : "date", "format" : "yyyy/MM/dd HH:mm:ss||yyyy/MM/dd" }, "attachments" : { "properties" : { "content_type" : { "type" : "string", "index" : "not_analyzed" }, "filename" : { "type" : "string", "index" : "not_analyzed" }, "hash" : { "type" : "string", "index" : "not_analyzed" }, "size" : { "type" : "long" } } }, "body" : { "type" : "string" }, "cc": { "type": "string" }, "date" : { "type" : "date", "store" : True, "format" : "yyyy/MM/dd HH:mm:ss", "index" : "not_analyzed" }, "epoch" : { # number of seconds since the epoch "type" : "long", "index" : "not_analyzed" }, "from" : { "type" : "string" }, "from_raw" : { "type" : "string", "index" : "not_analyzed" }, "in-reply-to" : { "type" : "string", "index" : "not_analyzed" }, "list" : { "type" : "string" }, "list_raw" : { "type" : "string", "index" : "not_analyzed" }, "message-id" : { "type" : "string", "index" : "not_analyzed" }, "mid" : { "type" : "string" }, "private" : { "type" : "boolean" }, "references" : { "type" : "string" }, "subject" : { "type" : "string", "fielddata": True # dropped later if DB_MAJOR==2 }, "to" : { "type" : "string" } } }, "attachment" : { "properties" : { "source" : { "type" : "binary" } } }, "mbox_source" : { "_all": { "enabled": False # this doc type is not searchable }, "properties" : { "source" : { "type" : "binary" }, "message-id" : { "type" : "string", "index" : "not_analyzed" }, "mid" : { "type" : "string" } } }, "mailinglists" : { "_all": { "enabled": False # this doc type is not searchable }, "properties" : { "description" : { "type" : "string", "index" : "not_analyzed" }, "list" : { "type" : "string", # "index" : "not_analyzed" }, "name" : { "type" : "string", "index" : "not_analyzed" } } }, "account" : { "_all": { "enabled": False # this doc type is not searchable }, "properties" : { "cid" : { "type" : "string", "index" : "not_analyzed" }, "credentials" : { "properties" : { "altemail" : { "type" : "object" }, "email" : { "type" : "string", "index" : "not_analyzed" }, "fullname" : { "type" : "string", "index" : "not_analyzed" }, "uid" : { "type" : "string", "index" : "not_analyzed" } } }, "internal" : { "properties" : { "cookie" : { "type" : "string", "index" : "not_analyzed" }, "ip" : { "type" : "string", "index" : "not_analyzed" }, "oauth_used" : { "type" : "string", "index" : "not_analyzed" } } }, "request_id" : { "type" : "string", "index" : "not_analyzed" } } }, "notifications" : { "_all": { "enabled": False # this doc type is not searchable }, "properties" : { "date" : { "type" : "date", "store" : True, "format" : "yyyy/MM/dd HH:mm:ss" }, "epoch" : { "type" : "long" }, "from" : { "type" : "string", # "index" : "not_analyzed" }, "in-reply-to" : { "type" : "string", "index" : "not_analyzed" }, "list" : { "type" : "string", # "index" : "not_analyzed" }, "message-id" : { "type" : "string", "index" : "not_analyzed" }, "mid" : { "type" : "string", # "index" : "not_analyzed" }, "private" : { "type" : "boolean" }, "recipient" : { "type" : "string", "index" : "not_analyzed" }, "seen" : { "type" : "long" }, "subject" : { "type" : "string", "fielddata": True # dropped later if DB_MAJOR==2 # "index" : "not_analyzed" }, "to" : { "type" : "string", # "index" : "not_analyzed" }, "type" : { "type" : "string", "index" : "not_analyzed" } } } } if DB_MAJOR == 2: # ES 2 handles fielddata differently del mappings['mbox']['properties']['subject']['fielddata'] del mappings['notifications']['properties']['subject']['fielddata'] res = es.indices.create(index = dbname, body = { "mappings" : mappings, "settings": settings } ) print("Index created! %s " % res) # we need to connect to database to determine the engine version es = Elasticsearch([ { 'host': hostname, 'port': port, 'use_ssl': False, 'url_prefix': urlPrefix }], max_retries=5, retry_on_timeout=True ) # elasticsearch logs lots of warnings on retries/connection failure logging.getLogger("elasticsearch").setLevel(logging.ERROR) try: DB_VERSION=es.info()['version']['number'] except ES_ConnectionError: print("WARNING: Connection error: could not determine the engine version.") DB_VERSION='0.0.0' DB_MAJOR=int(DB_VERSION.split('.')[0]) print("Versions: library %d (%s), engine %d (%s)" % (ES_MAJOR, '.'.join(map(str,ES_VERSION)) , DB_MAJOR, DB_VERSION)) if not DB_MAJOR == ES_MAJOR: print("WARNING: library version does not agree with engine version!") if DB_MAJOR == 0: # not known if args.noi: # allow setup to be used without engine running print("Could not determine the engine version. Assume it is the same as the library version.") DB_MAJOR = ES_MAJOR else: # if we cannot connect to get the version, we cannot create the index later print("Could not connect to the engine. Fatal.") sys.exit(1) if not args.noi: try: createIndex() except ElasticsearchException as e: print("Index creation failed: %s" % e) sys.exit(1) ponymail_cfg = 'ponymail.cfg' if not args.clobber and os.path.exists(ponymail_cfg): print("%s exists and clobber is not set" % ponymail_cfg) ponymail_cfg = 'ponymail.cfg.tmp' print("Writing importer config (%s)" % ponymail_cfg) with open(ponymail_cfg, "w") as f: f.write(""" ############################################################### # Pony Mail Configuration file # Main ES configuration [elasticsearch] hostname: %s dbname: %s port: %u ssl: false #uri: url_prefix #user: username #password: password #%s #backup: database name [archiver] generator: %s [debug] #cropout: string to crop from list-id ############################################################### """ % (hostname, dbname, port, 'wait: active shard count' if DB_MAJOR == 5 else 'write: consistency level (default quorum)', genname)) config_path = "../site/api/lib" config_file = "config.lua" if not args.clobber and os.path.exists(os.path.join(config_path,config_file)): print("%s exists and clobber is not set" % config_file) config_file = "config.lua.tmp" print("mod_lua configuration (%s)" % config_file) with open(os.path.join(config_path,config_file), "w") as f: f.write(""" local config = { es_url = "http://%s:%u/%s/", mailserver = "%s", -- mailport = 1025, -- override the default port (25) accepted_domains = "%s", wordcloud = %s, email_footer = nil, -- see the docs for how to set this up. full_headers = false, maxResults = 5000, -- max emails to return in one go. Might need to be bumped for large lists -- stats_maxBody = 200, -- max size of body snippet returned by stats.lua -- stats_wordExclude = ".|..|...", -- patterns to exclude from word cloud generated by stats.lua admin_oauth = {}, -- list of domains that may do administrative oauth (private list access) -- add 'www.googleapis.com' to the list for google oauth to decide, for instance. oauth_fields = { -- used for specifying individual oauth handling parameters. -- for example: -- internal = { -- email = 'CAS-EMAIL', -- name = 'CAS-NAME', -- uid = 'REMOTE-USER', -- env = 'subprocess' -- use environment vars instead of request headers -- } }, -- allow_insecure_cookie = true, -- override the default (false) - only use for test installations -- no_association = {}, -- domains that are not allowed for email association -- listsDisplay = 'regex', -- if defined, hide list names that don't match the regex -- debug = false, -- whether to return debug information antispam = true -- Whether or not to add anti-spam measures aimed at anonymous users. } return config """ % (hostname, port, dbname, mlserver, mldom, "true" if wce else "false")) print("Copying sample JS config to config.js (if needed)...") if not os.path.exists("../site/js/config.js") and os.path.exists("../site/js/config.js.sample"): shutil.copy("../site/js/config.js.sample", "../site/js/config.js") print("All done, Pony Mail should...work now :)") print("If you are using an external mail inbound server, \nmake sure to copy archiver.py and ponymail.cfg to it")