def start()

in server/api/plugins/worker.py [0:0]
97 lines of code
4 McCabe index (conditional complexity)

def start(DB, config, pidfile):
    addnote(DB, 'system', "(re)starting Blocky/2...")
    time.sleep(15)
    # First, check that indices are present
    # We're assuming ES6 here...
    if type(DB.ES) is plugins.database.BlockyESWrapper:
        for el in ['whitelist', 'banlist', 'rules', 'notes']:
            if not DB.ES.exists_doctype(index=DB.dbname, doc_type=el):
                print("DB index %s does not exist, creating.." % el)
                DB.ES.create(index=DB.dbname, doc_type = el)

    # Okay, now run the rule-set every once in a while
    while os.path.exists(pidfile):
        print("Running rulesets against database...")
        now = time.time()
        # First, fetch all rules
        rules = []
        res = DB.ES.search(
                index=DB.dbname,
                doc_type="rule",
                size = 5000,
                body = {
                    'query': {
                        'match_all': {}
                    }
                }
            )
        for hit in res['hits']['hits']:
            doc = hit['_source']
            doc['rid'] = hit['_id']
            rules.append(doc)

        # Prep list of bad IPs to block
        bad_ips = []

        # Prep list of indices to check against, for performance reasons
        d = datetime.datetime.utcnow()
        t = []
        for i in range(0,3):
            t.append(d.strftime("loggy-%Y.%m.%d"))
            d -= datetime.timedelta(days = 1)
        threes = ",".join(t) # Past three days


        # Now, run each rule
        for rule in rules:
            limit = rule.get('limit')
            span = rule.get('span')
            doctype = rule.get('type')
            name = rule.get('name', 'Generic rule')
            query = rule.get('query')
            rid = rule.get('rid', 'null')
            if limit and span and query:
                print("Running rule '%s'..." % name)


                # Start with timestamp terms
                terms = []
                terms.append({
                    "range": {
                        "@timestamp": {
                            "gt": ("now-%uh" % span)
                        }
                    }
                })

                # For each term in our blocky query, convert to ES terms
                q = construct_query(doctype, query, terms)
                # If valid query, run it and find bad IPs
                if q:
                    res = DB.ES.search(index=threes, request_timeout=90, body=q)
                    if res and 'aggregations' in res:
                        if doctype == 'httpd_visits':
                            for suspect in res['aggregations']['byip']['clients']['buckets']:
                                c = suspect['doc_count']
                                i = suspect['key']
                                if c > limit:
                                    r = "%s (%u >= limit of %u)" % (name, c, limit)
                                    bad_ips.append({'ip': i, 'reason': r, 'target': '*', 'rid': rid, 'epoch': int(time.time())})
                                    print("Found offender: %s; %s" % (i, r))

                        elif doctype == 'httpd_traffic':
                            for suspect in res['aggregations']['byip']['clients']['buckets']:
                                c = suspect['traffic']['value']
                                i = suspect['key']
                                if c > limit:
                                    r = "%s (%u >= limit of %u)" % (name, c, limit)
                                    bad_ips.append({'ip': i, 'reason': r, 'target': '*', 'rid': rid, 'epoch': int(time.time())})
                                    print("Found offender: %s; %s" % (i, r))

        print("Done with rules after %u seconds, found %u offenders" % (time.time() - now, len(bad_ips)))
        # Now we have a list of bad IPs.
        # Compare against whitelist, filter out any that are there
        # Block the rest

        # Fetch whitelist
        whitelist = get_whitelist(DB)

        # For each baddie, compare against whitelist
        to_ban = []
        for bad_ip in bad_ips:
            ipaddress = netaddr.IPAddress(bad_ip['ip'])
            whitelisted = False
            for whiteblock in whitelist:
                if ipaddress in whiteblock:
                    print("%s is whitelisted as %s, ignoring offenses..." % (ipaddress, whiteblock))
                    whitelisted = True
                    break
            if not whitelisted:
                to_ban.append(bad_ip)

        # For each IP we should be banning, ban if not already banned
        for bad_ip in to_ban:
            banid = make_sha1(bad_ip['ip'])
            if not DB.ES.exists(index=DB.dbname, doc_type = 'ban', id = banid):
                rdns = bad_ip['ip']
                try:
                    rdns = socket.gethostbyaddr(bad_ip['ip'])[0]
                except:
                    pass # don't care, at all
                bad_ip['dns'] = rdns if rdns else bad_ip['ip']
                print("Banning %s as %s" % (bad_ip['ip'], banid))
                if not DEBUG:
                    addnote(DB, 'autoban', "Banning %s on %s as %s (%s)" % (bad_ip['ip'], bad_ip['target'], banid, bad_ip['reason']))
                    DB.ES.index(index=DB.dbname, doc_type = 'ban', id = banid, body = bad_ip)

        # Now sleep for a minute
        time.sleep(120)