def run()

in api/pages/mail/retention.py [0:0]
144 lines of code
34 McCabe index (conditional complexity)

def run(API, environ, indata, session):
    
    # We need to be logged in for this!
    if not session.user:
        raise API.exception(403, "You must be logged in to use this API endpoint! %s")
    
    now = time.time()
    
    # First, fetch the view if we have such a thing enabled
    viewList = []
    if indata.get('view'):
        viewList = session.getView(indata.get('view'))
    if indata.get('subfilter'):
        viewList = session.subFilter(indata.get('subfilter'), view = viewList) 
    
    
    hl = indata.get('span', 12) # By default, we define a contributor as active if having committer in the past year
    tnow = datetime.date.today()
    nm = tnow.month - (tnow.month % 3)
    ny = tnow.year
    cy = ny
    ts = []
    
    if nm < 1:
        nm += 12
        ny = ny - 1
    
    peopleSeen = {}
    activePeople = {}
    allPeople = {}
    
    ny = 1970
    FoundSomething = False
    while ny < cy or (ny == cy and (nm+3) <= tnow.month):
        d = datetime.date(ny, nm, 1)
        t = time.mktime(d.timetuple())
        nm += 3
        if nm > 12:
            nm -= 12
            ny = ny + 1
        if ny == cy and nm > tnow.month:
            break
        d = datetime.date(ny, nm, 1)
        tf = time.mktime(d.timetuple())
        
        
        ####################################################################
        ####################################################################
        dOrg = session.user['defaultOrganisation'] or "apache"
        query = {
                    'query': {
                        'bool': {
                            'must': [
                                {'range':
                                    {
                                        'ts': {
                                            'from': t,
                                            'to': tf
                                        }
                                    }
                                },
                                {
                                    'term': {
                                        'organisation': dOrg
                                    }
                                }
                            ]
                        }
                    }
                }
        # Source-specific or view-specific??
        if indata.get('source'):
            query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
        elif viewList:
            query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
        
        # Get an initial count of commits
        res = session.DB.ES.count(
                index=session.DB.dbname,
                doc_type="email",
                body = query
            )
        
        globcount = res['count']
        if globcount == 0 and not FoundSomething:
            continue
        FoundSomething = True
        # Get top 1000 committers this period
        query['aggs'] = {
                'by_author': {
                    'terms': {
                        'field': 'sender',
                        'size': 200000
                    }                
                }
            }
        res = session.DB.ES.search(
                index=session.DB.dbname,
                doc_type="email",
                size = 0,
                body = query
            )
        
        
        retained = 0
        added = 0
        lost = 0
        
        thisPeriod = []
        for bucket in res['aggregations']['by_author']['buckets']:
            who = bucket['key']
            thisPeriod.append(who)
            if who not in peopleSeen:
                peopleSeen[who] = tf
                added += 1
            activePeople[who] = tf
            if who not in allPeople:
                allPeople[who] = tf
        
        prune = []
        for k, v in activePeople.items():
            if v < (t - (hl*30.45*86400)):
                prune.append(k)
                lost += 1
        
        for who in prune:
            del activePeople[who]
            del peopleSeen[who]
        retained = len(activePeople) - added
        
        ts.append({
            'date': tf,
            'People who (re)joined': added,
            'People who quit': lost,
            'People retained': retained,
            'Active people': added + retained
        })
    
    groups = [
        ['More than 5 years', (5*365*86400)+1],
        ['2 - 5 years', (2*365*86400)+1],
        ['1 - 2 years', (365*86400)],
        ['Less than a year', 1]
    ]
    
    counts = {}
    totExp = 0
    for person, age in activePeople.items():
        totExp += time.time() - allPeople[person]
        for el in sorted(groups, key = lambda x: x[1], reverse = True):
            if allPeople[person] <= time.time() - el[1]:
                counts[el[0]] = counts.get(el[0], 0) + 1
                break
    avgyr = (totExp / (86400*365)) / max(len(activePeople),1)
    
    ts = sorted(ts, key = lambda x: x['date'])
    
    avgm = ""
    yr = int(avgyr)
    ym = round((avgyr-yr)*12)
    if yr >= 1:
        avgm += "%u year%s" % (yr, "s" if yr != 1 else "")
    if ym > 0:
        avgm += "%s%u month%s" % (", " if yr > 0 else "", ym, "s" if ym != 1 else "")
    JSON_OUT = {
        'text': "This shows Contributor retention as calculated over a %u month timespan. The average experience of currently active people is %s." % (hl, avgm),
        'timeseries': ts,
        'counts': counts,
        'averageYears': avgyr,
        'okay': True,
        'responseTime': time.time() - now,
    }
    yield json.dumps(JSON_OUT)