def run()

in api/pages/issue/relationships.py [0:0]
204 lines of code
50 McCabe index (conditional complexity)

def run(API, environ, indata, session):
    
    # We need to be logged in for this!
    if not session.user:
        raise API.exception(403, "You must be logged in to use this API endpoint! %s")
    
    now = time.time()
    
    # First, fetch the view if we have such a thing enabled
    viewList = []
    if indata.get('view'):
        viewList = session.getView(indata.get('view'))
    if indata.get('subfilter'):
        viewList = session.subFilter(indata.get('subfilter'), view = viewList)  
    
    dateTo = indata.get('to', int(time.time()))
    dateFrom = indata.get('from', dateTo - (86400*30*6)) # Default to a 6 month span
    
    which = 'committer_email'
    role = 'committer'
    if indata.get('author', False):
        which = 'author_email'
        role = 'author'
    
    interval = indata.get('interval', 'day')
    
    
    ####################################################################
    ####################################################################
    dOrg = session.user['defaultOrganisation'] or "apache"
    query = {
                'query': {
                    'bool': {
                        'must': [
                            {'range':
                                {
                                    'closed': {
                                        'from': dateFrom,
                                        'to': dateTo
                                    }
                                }
                            },
                            {
                                'term': {
                                    'organisation': dOrg
                                }
                            }
                        ]
                    }
                }
            }
    # Source-specific or view-specific??
    if indata.get('source'):
        query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}})
    elif viewList:
        query['query']['bool']['must'].append({'terms': {'sourceID': viewList}})
    if indata.get('email'):
        query['query']['bool']['should'] = [{'term': {'issueCreator': indata.get('email')}}, {'term': {'issueCloser': indata.get('email')}}]
        query['query']['bool']['minimum_should_match'] = 1
    
    # Get number of commits, this period, per repo
    query['aggs'] = {
            'per_repo': {
                'terms': {
                    'field': 'sourceID',
                    'size': 10000
                }                
            }
        }
    res = session.DB.ES.search(
            index=session.DB.dbname,
            doc_type="issue",
            size = 0,
            body = query
        )
    
    repos = {}
    repo_commits = {}
    authorlinks = {}
    max_commits = 0
    max_links = 0
    max_shared = 0
    max_authors = 0
    
    # For each repo, count commits and gather data on authors
    for doc in res['aggregations']['per_repo']['buckets']:
        sourceID = doc['key']
        commits = doc['doc_count']
        
        # Gather the unique authors/committers
        query['aggs'] = {
            'per_closer': {
                'terms': {
                    'field': 'issueCloser',
                    'size': 10000
                }                
            },
            'per_creator': {
                'terms': {
                    'field': 'issueCreator',
                    'size': 10000
                }                
            }
        }
        xquery = copy.deepcopy(query)
        xquery['query']['bool']['must'].append({'term': {'sourceID': sourceID}})
        xres = session.DB.ES.search(
            index=session.DB.dbname,
            doc_type="issue",
            size = 0,
            body = xquery
        )
        authors = []
        for person in xres['aggregations']['per_closer']['buckets']:
            authors.append(person['key'])
        for person in xres['aggregations']['per_creator']['buckets']:
            authors.append(person['key'])
        if commits > max_commits:
            max_commits = commits
        repos[sourceID] = authors
        repo_commits[sourceID] = commits
    
    # Now, figure out which repos share the same contributors
    repo_links = {}
    repo_notoriety = {}
    repodatas = {}
    repo_authors = {}
    minLinks = indata.get('links', 1)

    # Grab data of all sources
    for ID, repo in repos.items():
        mylinks = {}
        if not session.DB.ES.exists(index=session.DB.dbname, doc_type="source", id = ID):
            continue
        repodatas[ID] = session.DB.ES.get(index=session.DB.dbname, doc_type="source", id = ID)
        
    for ID, repo in repos.items():
        mylinks = {}
        if not ID in repodatas:
            continue
        repodata = repodatas[ID]
        oID = ID
        if indata.get('collapse'):
            m = re.search(indata.get('collapse'), repodata['_source']['sourceURL'])
            if m:
                ID = m.group(1)
        else:
            ID = re.sub(r"^.+/", "", repodata['_source']['sourceURL'])
        for xID, xrepo in repos.items():
            if xID in repodatas:
                xrepodata = repodatas[xID]
                if indata.get('collapse'):
                    m = re.search(indata.get('collapse'), xrepodata['_source']['sourceURL'])
                    if m:
                        xID = m.group(1)
                else:
                    xID = re.sub(r"^.+/", "", xrepodata['_source']['sourceURL'])
                if xID != ID:
                    xlinks = []
                    for author in xrepo:
                        if author in repo:
                            xlinks.append(author)
                    lname = "%s@%s" % (ID, xID) # Link name
                    rname = "%s@%s" % (xID, ID) # Reverse link name
                    if len(xlinks) >= minLinks and not rname in repo_links:
                        mylinks[xID] = len(xlinks)
                        repo_links[lname] = repo_links.get(lname, 0) + len(xlinks) # How many contributors in common between project A and B?
                        if repo_links[lname] > max_shared:
                            max_shared = repo_links[lname]
        if ID not in repo_notoriety:
            repo_notoriety[ID] = set()
        repo_notoriety[ID].update(mylinks.keys()) # How many projects is this repo connected to?
        
        if ID not in repo_authors:
            repo_authors[ID] = set()
        repo_authors[ID].update(repo) # How many projects is this repo connected to?
        
        if ID != oID:
            repo_commits[ID] = repo_commits.get(ID, 0) + repo_commits[oID]
            if repo_commits[ID] > max_commits:
                max_commits = repo_commits[ID] # Used for calculating max link thickness
        if len(repo_notoriety[ID]) > max_links:
            max_links = len(repo_notoriety[ID])
        if len(repo_authors[ID]) > max_authors:
            max_authors = len(repo_authors[ID]) # Used for calculating max sphere size in charts
        
    # Now, pull it all together!
    nodes = []
    links = []
    existing_repos = []
    for sourceID in repo_notoriety.keys():
        lsize = 0
        for k in repo_links.keys():
            fr, to = k.split('@')
            if fr == sourceID or to == sourceID:
                lsize += 1
        asize = len(repo_authors[sourceID])
        doc = {
            'id': sourceID,
            'name': sourceID,
            'issues': repo_commits[sourceID],
            'authors': asize,
            'links': lsize,
            'size': max(5, (1 - abs(math.log10(asize / max_authors))) * 45),
            'tooltip': "%u connections, %u contributors, %u issues" % (lsize, asize, repo_commits[sourceID])
        }
        nodes.append(doc)
        existing_repos.append(sourceID)
            
    for k, s in repo_links.items():
        size = s
        fr, to = k.split('@')
        if fr in existing_repos and to in existing_repos:
            doc = {
                'source': fr,
                'target': to,
                'value': max(1, (size/max_shared) * 8),
                'name': "%s &#8596; %s" % (fr, to),
                'tooltip': "%u contributors in common" % size
            }
            links.append(doc)
    
    JSON_OUT = {
        'maxLinks': max_links,
        'maxShared': max_shared,
        'widgetType': {
            'chartType': 'link'  # Recommendation for the UI
        },
        'links': links,
        'nodes': nodes,
        'interval': interval,
        'okay': True,
        'responseTime': time.time() - now
    }
    yield json.dumps(JSON_OUT)