def run()

in api/pages/mail/pony-timeseries.py [0:0]
109 lines of code
19 McCabe index (conditional complexity)

def run(API, environ, indata, session):
    
    # We need to be logged in for this!
    if not session.user:
        raise API.exception(403, "You must be logged in to use this API endpoint! %s")
    
    now = time.time()
    
    # First, fetch the view if we have such a thing enabled
    viewList = []
    if indata.get('view'):
        viewList = session.getView(indata.get('view'))
    if indata.get('subfilter'):
        viewList = session.subFilter(indata.get('subfilter'), view = viewList) 
    
    hl = indata.get('span', 24)
    tnow = datetime.date.today()
    nm = tnow.month - (tnow.month % 3)
    ny = tnow.year
    ts = []
    
    if nm < 1:
        nm += 12
        ny = ny - 1
    
    while ny > 1970:
        d = datetime.date(ny, nm, 1)
        t = time.mktime(d.timetuple())
        d = d - dateutil.relativedelta.relativedelta(months=hl)
        tf = time.mktime(d.timetuple())
        nm -= 3
        if nm < 1:
            nm += 12
            ny = ny - 1
        
        
        ####################################################################
        ####################################################################
        dOrg = session.user['defaultOrganisation'] or "apache"
        query = {
                    'query': {
                        'bool': {
                            'must': [
                                {'range':
                                    {
                                        'ts': {
                                            'from': tf,
                                            'to': t
                                        }
                                    }
                                },
                                {
                                    'term': {
                                        'organisation': dOrg
                                    }
                                }
                            ],
                            'must_not': [
                                {
                                    'match': {
                                        'sourceURL': 'commits*'
                                    }
                                }
                            ]
                        }
                    }
                }
        # Source-specific or view-specific??
        if indata.get('source'):
            query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
        elif viewList:
            query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
        
        # Get an initial count of commits
        res = session.DB.ES.count(
                index=session.DB.dbname,
                doc_type="email",
                body = query
            )
        
        globcount = res['count']
        if globcount == 0:
            break
        
        # Get top 25 committers this period
        query['aggs'] = {
                'by_sender': {
                    'terms': {
                        'field': 'sender',
                        'size': 2500
                    }                
                }
            }
        res = session.DB.ES.search(
                index=session.DB.dbname,
                doc_type="email",
                size = 0,
                body = query
            )
        
        
        # PF for authors
        pf_author = 0
        pf_author_count = 0
        cpf = {}
        for bucket in res['aggregations']['by_sender']['buckets']:
            count = bucket['doc_count']
            # Assume anyone sending > 10 emails per day is a bot (or a commit list)!
            if count > (10*365*hl):
                globcount -= count
                continue
            pf_author += 1
            pf_author_count += count
            if '@' in bucket['key']:
                mldom = bucket['key'].lower().split('@')[-1]
                cpf[mldom] = True
            if pf_author_count > int(globcount/2):
                break
        ts.append({
            'date': t,
            'Pony Factor (authors)': pf_author,
            'Meta-Pony Factor': len(cpf)
        })
    
    ts = sorted(ts, key = lambda x: x['date'])
    
    JSON_OUT = {
        'text': "This shows Pony Factors as calculated over a %u month timespan. Authorship is a measure of the people it takes to make up the bulk of email traffic, and meta-pony is an estimation of how many organisations/companies are involved." % hl,
        'timeseries': ts,
        'okay': True,
        'responseTime': time.time() - now,
    }
    yield json.dumps(JSON_OUT)