def run()

in api/pages/mail/map.py [0:0]
202 lines of code
57 McCabe index (conditional complexity)

def run(API, environ, indata, session):
    
    # We need to be logged in for this!
    if not session.user:
        raise API.exception(403, "You must be logged in to use this API endpoint! %s")
    
    now = time.time()
    
    # First, fetch the view if we have such a thing enabled
    viewList = []
    if indata.get('view'):
        viewList = session.getView(indata.get('view'))
    if indata.get('subfilter'):
        viewList = session.subFilter(indata.get('subfilter'), view = viewList)  
    
    dateTo = indata.get('to', int(time.time()))
    dateFrom = indata.get('from', dateTo - (86400*30*6)) # Default to a 6 month span
    span = dateTo - dateFrom
    
    ####################################################################
    ####################################################################
    dOrg = session.user['defaultOrganisation'] or "apache"
    query = {
                'query': {
                    'bool': {
                        'must': [
                            {'range':
                                {
                                    'ts': {
                                        'from': dateFrom,
                                        'to': dateTo
                                    }
                                }
                            },
                            {
                                'term': {
                                    'organisation': dOrg
                                }
                            }
                        ]
                    }
                }
            }
    # Source-specific or view-specific??
    if indata.get('source'):
        query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
    elif viewList:
        query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
    if indata.get('search'):
        query['query']['bool']['must'].append({'regexp': {'subject': indata.get('search')}})
    
    if indata.get('email'):
        query['query']['bool']['minimum_should_match'] = 1
        query['query']['bool']['should'] = [
            {'term': {'replyto.keyword': indata.get('email')}},
            {'term': {'sender': indata.get('email')}},
            ]
        
    # Get number of commits, this period, per repo
    query['aggs'] = {
            'per_ml': {
                'terms': {
                    'field': 'replyto.keyword' if not indata.get('author') else 'sender',
                    'size': 150
                }                
            }
        }
    res = session.DB.ES.search(
            index=session.DB.dbname,
            doc_type="email",
            size = 0,
            body = query
        )
    
    repos = {}
    repo_commits = {}
    authorlinks = {}
    max_emails = 0
    max_links = 0
    max_shared = 0
    max_authors = 0
    minLinks = indata.get('links', 1)
    
    if indata.get('email'):
            del query['query']['bool']['should']
            del query['query']['bool']['minimum_should_match']
    
    # For each repo, count commits and gather data on authors
    for doc in res['aggregations']['per_ml']['buckets']:
        sourceID = doc['key']
        emails = doc['doc_count']
        if re.search(badBots, sourceID): # No bots
            continue
        if emails > (span/86400)*4: # More than 4/day and we consider you a bot!
            continue
        
            
        # Gather the unique authors/committers
        query['aggs'] = {
            'per_ml': {
                'terms': {
                    'field': 'sender' if not indata.get('author') else 'replyto.keyword',
                    'size': 5000
                }                
            }
        }
        xquery = copy.deepcopy(query)
        
        xquery['query']['bool']['must'].append({'term': {'replyto.keyword' if not indata.get('author') else 'sender': sourceID}})
        xres = session.DB.ES.search(
            index=session.DB.dbname,
            doc_type="email",
            size = 0,
            body = xquery
        )
        authors = []
        for person in xres['aggregations']['per_ml']['buckets']:
            pk = person['key']
            authors.append(pk)
        if emails > max_emails:
            max_emails = emails
        repos[sourceID] = authors
        repo_commits[sourceID] = emails
    
    # Now, figure out which repos share the same contributors
    repo_links = {}
    repo_notoriety = {}
    repodatas = {}
    repo_authors = {}

    # Grab data of all sources
    for ID, repo in repos.items():
        mylinks = {}
        hID = hashlib.sha1( ("%s%s" % (dOrg, ID)).encode('ascii', errors='replace')).hexdigest()
        if not session.DB.ES.exists(index=session.DB.dbname, doc_type="person", id = hID):
            continue
        repodatas[ID] = session.DB.ES.get(index=session.DB.dbname, doc_type="person", id = hID)
        
    for ID, repo in repos.items():
        mylinks = {}
        if not ID in repodatas:
            continue
        repodata = repodatas[ID]
        oID = ID
        if indata.get('collapse'):
            m = re.search(indata.get('collapse'), repodata['_source']['email'])
            if m:
                ID = m.group(1)
        xlinks = []
        for xID, xrepo in repos.items():
            if xID in repodatas:
                xrepodata = repodatas[xID]
                if indata.get('collapse'):
                    m = re.search(indata.get('collapse'), xrepodata['_source']['email'])
                    if m:
                        xID = m.group(1)
                if xID != ID:
                    
                    if ID in xrepo:
                        xlinks.append(xID)
                        lname = "%s||%s" % (ID, xID) # Link name
                        rname = "%s||%s" % (xID, ID) # Reverse link name
                        if len(xlinks) > 0 and rname not in repo_links and len(xlinks) >= minLinks:
                            mylinks[ID] = mylinks.get(ID, 0) + 1
                            repo_links[lname] = repo_links.get(lname, 0) + len(xlinks) # How many contributors in common between project A and B?
                            if repo_links[lname] > max_shared:
                                max_shared = repo_links[lname]
                        elif rname in repo_links:
                            repo_links[rname] = repo_links.get(rname, 0) + len(xlinks)
        if ID not in repo_notoriety:
            repo_notoriety[ID] = set()
        repo_notoriety[ID].update(mylinks.keys()) # How many projects is this repo connected to?
        
        if ID not in repo_authors:
            repo_authors[ID] = set()
        repo_authors[ID].update(repo) # How many projects is this repo connected to?
        
        if ID != oID:
            repo_commits[ID] = repo_commits.get(ID, 0) + repo_commits[oID]
            if repo_commits[ID] > max_emails:
                max_emails = repo_commits[ID] # Used for calculating max link thickness
        if len(repo_notoriety[ID]) > max_links:
            max_links = len(repo_notoriety[ID])
        if len(repo_authors[ID]) > max_authors:
            max_authors = len(repo_authors[ID]) # Used for calculating max sphere size in charts
        
    # Now, pull it all together!
    nodes = []
    links = []
    existing_repos = []
    for sourceID, ns in repo_notoriety.items():
        lsize = 0
        for k in repo_links.keys():
            fr, to = k.split('||')
            if fr == sourceID or to == sourceID:
                lsize += 1
        asize = len(repo_authors[sourceID])
        doc = {
            'id': sourceID,
            'gravatar': hashlib.md5(sourceID.lower().encode('utf-8')).hexdigest(),
            'name': repodatas[sourceID]['_source'].get('name', sourceID),
            'replies': repo_commits[sourceID],
            'authors': asize,
            'links': lsize,
            'size': max(5, (1 - abs(math.log10(repo_commits[sourceID] / max_emails))) * 45),
            'tooltip': "%u connections, %u fellows, %u replies to" % (lsize, asize, repo_commits[sourceID])
        }
        nodes.append(doc)
        existing_repos.append(sourceID)
            
    for k, s in repo_links.items():
        size = s
        fr, to = k.split('||')
        if fr in existing_repos and to in existing_repos:
            doc = {
                'source': fr,
                'target': to,
                'value': max(1, (size/max_shared) * 5),
                'name': "%s &#8596; %s" % (fr, to),
                'tooltip': "%u topics exchanged" % size
            }
            links.append(doc)
    
    JSON_OUT = {
        'maxLinks': max_links,
        'maxShared': max_shared,
        'widgetType': {
            'chartType': 'link'  # Recommendation for the UI
        },
        'links': links,
        'nodes': nodes,
        'okay': True,
        'responseTime': time.time() - now
    }
    yield json.dumps(JSON_OUT)