def run()

in api/pages/code/committers.py [0:0]
168 lines of code
19 McCabe index (conditional complexity)

def run(API, environ, indata, session):
    
    # We need to be logged in for this!
    if not session.user:
        raise API.exception(403, "You must be logged in to use this API endpoint! %s")
    
    now = time.time()
    
    # First, fetch the view if we have such a thing enabled
    viewList = []
    if indata.get('view'):
        viewList = session.getView(indata.get('view'))
    if indata.get('subfilter'):
        viewList = session.subFilter(indata.get('subfilter'), view = viewList) 
    
    
    dateTo = indata.get('to', int(time.time()))
    dateFrom = indata.get('from', dateTo - (86400*30*6)) # Default to a 6 month span
    
    which = 'committer_email'
    role = 'committer'
    if indata.get('author', False):
        which = 'author_email'
        role = 'author'
    
    interval = indata.get('interval', 'month')
    
    
    ####################################################################
    ####################################################################
    dOrg = session.user['defaultOrganisation'] or "apache"
    query = {
                'query': {
                    'bool': {
                        'must': [
                            {'range':
                                {
                                    'tsday': {
                                        'from': dateFrom,
                                        'to': dateTo
                                    }
                                }
                            },
                            {
                                'term': {
                                    'organisation': dOrg
                                }
                            }
                        ]
                    }
                }
            }
    # Source-specific or view-specific??
    if indata.get('source'):
        query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
    elif viewList:
        query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
    if indata.get('email'):
        query['query']['bool']['should'] = [{'term': {'committer_email': indata.get('email')}}, {'term': {'author_email': indata.get('email')}}]
        query['query']['bool']['minimum_should_match'] = 1
    
    # Path filter?
    if indata.get('pathfilter'):
        pf = indata.get('pathfilter')
        if '!' in pf:
            pf = pf.replace('!', '')
            query['query']['bool']['must_not'] = query['query']['bool'].get('must_not', [])
            query['query']['bool']['must_not'].append({'regexp': {'files_changed': pf}})
        else:
            query['query']['bool']['must'].append({'regexp': {'files_changed': pf}})
    
    # Get top 25 committers this period
    query['aggs'] = {
            'committers': {
                'terms': {
                    'field': which,
                    'size': 25
                },
                'aggs': {
                'byinsertions': {
                    'terms': {
                        'field': which
                    },
                    'aggs': {
                        'stats': {
                            'sum': {
                                'field': "insertions"
                            }
                        }
                    }
                },
                'bydeletions': {
                    'terms': {
                        'field': which
                    },
                    'aggs': {
                        'stats': {
                            'sum': {
                                'field': "deletions"
                            }
                        }
                    }
                },
            }
            },
            
        }
    res = session.DB.ES.search(
            index=session.DB.dbname,
            doc_type="code_commit",
            size = 0,
            body = query
        )

    people = {}
    for bucket in res['aggregations']['committers']['buckets']:
        email = bucket['key']
        count = bucket['doc_count']
        sha = hashlib.sha1( ("%s%s" % (dOrg, email)).encode('utf-8') ).hexdigest()
        if session.DB.ES.exists(index=session.DB.dbname,doc_type="person",id = sha):
            pres = session.DB.ES.get(
                index=session.DB.dbname,
                doc_type="person",
                id = sha
                )
            person = pres['_source']
            person['name'] = person.get('name', 'unknown')
            people[email] = person
            people[email]['gravatar'] = hashlib.md5(person.get('email', 'unknown').encode('utf-8')).hexdigest()
            people[email]['count'] = count
            people[email]['subcount'] = {
                'insertions': int(bucket['byinsertions']['buckets'][0]['stats']['value']),
                'deletions': int(bucket['bydeletions']['buckets'][0]['stats']['value'])
            }
    
    topN = []
    for email, person in people.items():
        topN.append(person)
    topN = sorted(topN, key = lambda x: x['count'], reverse = True)
        
    # Get timeseries for this period
    query['aggs'] = {
            'per_interval': {
                'date_histogram': {
                    'field': 'date',
                    'interval': interval
                },
                'aggs': {
                    'by_committer': {
                        'cardinality': {
                            'field': 'committer_email'
                        }
                    },
                    'by_author': {
                        'cardinality': {
                            'field': 'author_email'
                        }
                    }
                }
            }
        }
    
    res = session.DB.ES.search(
            index=session.DB.dbname,
            doc_type="code_commit",
            size = 0,
            body = query
        )

    timeseries = []
    for bucket in res['aggregations']['per_interval']['buckets']:
        ts = int(bucket['key'] / 1000)
        ccount = bucket['by_committer']['value']
        acount = bucket['by_author']['value']
        timeseries.append({
            'date': ts,
            'committers': ccount,
            'authors': acount
        })
    
    JSON_OUT = {
        'topN': {
            'denoter': 'commits',
            'items': topN
        },
        'timeseries': timeseries,
        'sorted': people,
        'okay': True,
        'responseTime': time.time() - now,
        'widgetType': {
            'chartType': 'bar'
        }
    }
    yield json.dumps(JSON_OUT)