#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""" Modify lists and messages

This utility can be used to:
- rename a list
- make a list private
- make a list public
- update the description for a list
- delete mails from a list (does not delete mbox_source entries)
- obfuscate some fields (from, subject, body) in an mbox entry (does not obfuscate the raw source document)

"""

import sys
import time
import argparse

from elastic import Elastic

class options:
    def __init__(self):
        parser = argparse.ArgumentParser(description='Command line options.')
        # Cannot have both source and mid as input
        source_group = parser.add_mutually_exclusive_group()
        source_group.add_argument('--source', dest='source', type=str,
                           help='Source list to edit')
        source_group.add_argument('--mid', dest='mid', type=str,
                           help='Source Message-ID to edit')
        parser.add_argument('--rename', dest='target', type=str,
                           help='(optional) new list ID')
        parser.add_argument('--desc', dest='desc', type=str,
                           help='(optional) new list description')
        parser.add_argument('--obfuscate', dest='obfuscate', type=str,
                           help='Things to obfuscate in body, if any')
        # private and public are mutually exclusive
        privacy_group = parser.add_mutually_exclusive_group()
        privacy_group.add_argument('--private', dest='private', action='store_true',
                           help='Make all emails in list private')
        privacy_group.add_argument('--public', dest='public', action='store_true',
                           help='Make all emails in list public')
        parser.add_argument('--delete', dest='delete', action='store_true',
                           help='Delete emails from this list')
        parser.add_argument('--wildcard', dest='glob', action='store_true',
                           help='Allow wildcards in --source')
        parser.add_argument('--debug', dest='debug', action='store_true',
                           help='Debug output - very noisy!')
        parser.add_argument('--notag', dest='notag', action='store_true',
                           help='List IDs do not have <> in them')
        parser.add_argument('--test', dest='test', action='store_true',
                           help='Only test for occurrences, do not run the chosen action (dry run)')
        
        args = parser.parse_args()
        
        self.sourceLID = args.source
        self.targetLID = args.target
        self.desc = args.desc
        self.makePrivate = args.private
        self.makePublic = args.public
        self.deleteEmails = args.delete
        self.wildcard = args.glob
        self.debug = args.debug
        self.notag = args.notag
        self.mid = args.mid
        self.obfuscate = args.obfuscate
        self.dryrun = args.test
        
        self.privacyChange = self.makePrivate or self.makePublic
        self.otherChange = self.targetLID or self.desc or self.obfuscate
        self.anyChange = self.privacyChange or self.otherChange
        
        if not self.sourceLID and not self.mid:
            print("No source list ID specified!")
            parser.print_help()
            sys.exit(-1)
        if not (self.anyChange or self.deleteEmails):
            print("Nothing to do! No target list ID or action specified")
            parser.print_help()
            sys.exit(-1)
        if self.desc and not self.sourceLID:
            print("No source list ID specified for description!")
            parser.print_help()
            sys.exit(-1)
        if self.anyChange and self.deleteEmails:
            print("Cannot both change and delete emails in the same run")
            parser.print_help()
            sys.exit(-1)
        
        # TODO does it make sense to allow --rename with --mid?
        # i.e. rename the list for a single mid?
        
        if self.sourceLID:
            self.sourceLID = ("%s" if self.notag else "<%s>")  % self.sourceLID.replace("@", ".").strip("<>")
        if self.targetLID:
            self.targetLID = "<%s>" % self.targetLID.replace("@", ".").strip("<>")


def process_hits(page, args, dbname):
    """ Processes each hit in a scroll search and proposes changes
        in the array returned """
    changes = []
    if 'hits' in page and 'hits' in page['hits']:
        for hit in page['hits']['hits']:
            doc = hit['_id']
            body = {}
            if args.obfuscate:
                body['body'] = hit['_source']['body'].replace(args.obfuscate, "...")
                body['subject'] = hit['_source']['subject'].replace(args.obfuscate, "...")
                body['from'] = hit['_source']['from'].replace(args.obfuscate, "...")
            if args.targetLID:
                body['list_raw'] = args.targetLID
                body['list'] = args.targetLID
            if args.makePrivate:
                body['private'] = True
            if args.makePublic:
                body['private'] = False
            if not args.dryrun:
                changes.append({
                    '_op_type': 'delete' if args.deleteEmails else 'update',
                    '_index': dbname,
                    '_type': 'mbox',
                    '_id': doc,
                    'doc': body
                    })
            else:
                changes.append({}) # Empty action for counting if dryrun, so we never accidentally run it.
    return changes

def main():
    es = Elastic()
    dbname = es.getdbname()
    # get config and set up default databas
    es = Elastic()
    # default database name
    dbname = es.getdbname()
    
    args = options()
    
    print("Beginning list edit:")
    if args.sourceLID:
        print("  - List ID: %s" % args.sourceLID)
    else:
        print("  - MID: %s" % args.mid)
    if args.targetLID:
        print("  - Target ID: %s" % args.targetLID)
    if args.makePublic:
        print("  - Action: Mark all emails public")
    if args.makePrivate:
        print("  - Action: Mark all emails private")
    if args.deleteEmails:
        print("  - Action: Delete emails (sources will be kept!)")
    if args.obfuscate:
        print("  - Action: Obfuscate parts of email containing: %s" % args.obfuscate)
    
    if args.desc:
        print("  - Action: add description: %s" % args.desc)
        if args.dryrun:
            print("DRY RUN - NO CHANGES WILL BE MADE")
        else:
            LID = args.sourceLID
            if args.targetLID:
                LID = args.targetLID
            es.index(
                doc_type="mailinglists",
                id=LID,
                body = {
                    'list': LID,
                    'name': LID,
                    'description':args.desc
                }
            )
            print("All done, updated description.")
    
    if args.targetLID or args.makePrivate or args.makePublic or args.deleteEmails or args.mid or args.obfuscate:
        if args.dryrun:
            print("DRY RUN - NO CHANGES WILL BE MADE")
        print("Updating docs...")
        then = time.time()
        terms = {
            'wildcard' if args.wildcard else 'term': {
                'list_raw': args.sourceLID
            }
        }
        if args.mid:
            terms = {
                'term': {
                    'mid': args.mid
                }
            }
        query = {
            '_source': ['body', 'subject', 'from'] if args.obfuscate else False,
            'query': {
                'bool': {
                    'must': [
                        terms
                    ]
                }
            }
        }
        proposed_changes = []
        for page in es.scan_and_scroll(body = query):
            prop = process_hits(page, args, dbname)
            if prop:
                proposed_changes.extend(prop)
        
        tmp = []
        count = len(proposed_changes)
        processed = 0
        # Handle proposed changes in batches of 500
        while len(proposed_changes) > 0:
            tmp.append(proposed_changes.pop(0))
            if len(tmp) >= 500:
                if not args.dryrun:
                    es.bulk(tmp)
                processed += len(tmp)
                tmp = []
                print("Processed %u documents..." % processed)
        # Any stragglers remaining gets processed here
        if len(tmp) > 0:
            if not args.dryrun:
                es.bulk(tmp)
            processed += len(tmp)
            print("Processed %u documents..." % processed)
            
        print("All done, processed %u docs in %u seconds" % (count, time.time() - then))

if __name__ == '__main__':
    main()
