api/pages/code/relationships.py

#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ######################################################################## # OPENAPI-URI: /api/code/relationships ######################################################################## # get: # responses: # '200': # content: # application/json: # schema: # $ref: '#/components/schemas/Sloc' # description: 200 Response # default: # content: # application/json: # schema: # $ref: '#/components/schemas/Error' # description: unexpected error # security: # - cookieAuth: [] # summary: Shows a breakdown of contributor relationships between repositories # post: # requestBody: # content: # application/json: # schema: # $ref: '#/components/schemas/defaultWidgetArgs' # responses: # '200': # content: # application/json: # schema: # $ref: '#/components/schemas/Sloc' # description: 200 Response # default: # content: # application/json: # schema: # $ref: '#/components/schemas/Error' # description: unexpected error # security: # - cookieAuth: [] # summary: Shows a breakdown of contributor relationships between repositories # ######################################################################## """ This is the committer relationship list renderer for Kibble """ import json import time import hashlib import copy import re import math def run(API, environ, indata, session): # We need to be logged in for this! if not session.user: raise API.exception(403, "You must be logged in to use this API endpoint! %s") now = time.time() # First, fetch the view if we have such a thing enabled viewList = [] if indata.get('view'): viewList = session.getView(indata.get('view')) if indata.get('subfilter'): viewList = session.subFilter(indata.get('subfilter'), view = viewList) dateTo = indata.get('to', int(time.time())) dateFrom = indata.get('from', dateTo - (86400*30*6)) # Default to a 6 month span which = 'committer_email' role = 'committer' if indata.get('author', False): which = 'author_email' role = 'author' interval = indata.get('interval', 'day') #################################################################### #################################################################### dOrg = session.user['defaultOrganisation'] or "apache" query = { 'query': { 'bool': { 'must': [ {'range': { 'tsday': { 'from': dateFrom, 'to': dateTo } } }, { 'term': { 'organisation': dOrg } } ] } } } # Source-specific or view-specific?? if indata.get('source'): query['query']['bool']['must'].append({'term': {'sourceID': indata.get('source')}}) elif viewList: query['query']['bool']['must'].append({'terms': {'sourceID': viewList}}) if indata.get('email'): query['query']['bool']['must'].append({'term': {'committer_email' if not indata.get('author') else 'author_email': indata.get('email')}}) # Get number of commits, this period, per repo query['aggs'] = { 'per_repo': { 'terms': { 'field': 'sourceID', 'size': 10000 } } } res = session.DB.ES.search( index=session.DB.dbname, doc_type="code_commit", size = 0, body = query ) repos = {} repo_commits = {} authorlinks = {} max_commits = 0 max_links = 0 max_shared = 0 max_authors = 0 minLinks = indata.get('links', 1) # For each repo, count commits and gather data on authors for doc in res['aggregations']['per_repo']['buckets']: sourceID = doc['key'] commits = doc['doc_count'] # Gather the unique authors/committers query['aggs'] = { 'per_contributor': { 'terms': { 'field': 'committer_email' if not indata.get('author') else 'author_email', 'size': 10000 } } } xquery = copy.deepcopy(query) xquery['query']['bool']['must'].append({'term': {'sourceID': sourceID}}) xres = session.DB.ES.search( index=session.DB.dbname, doc_type="code_commit", size = 0, body = xquery ) authors = [] for person in xres['aggregations']['per_contributor']['buckets']: authors.append(person['key']) if commits > max_commits: max_commits = commits repos[sourceID] = authors repo_commits[sourceID] = commits # Now, figure out which repos share the same contributors repo_links = {} repo_notoriety = {} repodatas = {} repo_authors = {} # Grab data of all sources for ID, repo in repos.items(): mylinks = {} if not session.DB.ES.exists(index=session.DB.dbname, doc_type="source", id = ID): continue repodatas[ID] = session.DB.ES.get(index=session.DB.dbname, doc_type="source", id = ID) for ID, repo in repos.items(): mylinks = {} if not ID in repodatas: continue repodata = repodatas[ID] oID = ID if indata.get('collapse'): m = re.search(indata.get('collapse'), repodata['_source']['sourceURL']) if m: ID = m.group(1) else: ID = re.sub(r"^.+/", "", repodata['_source']['sourceURL']) for xID, xrepo in repos.items(): if xID in repodatas: xrepodata = repodatas[xID] if indata.get('collapse'): m = re.search(indata.get('collapse'), xrepodata['_source']['sourceURL']) if m: xID = m.group(1) else: xID = re.sub(r"^.+/", "", xrepodata['_source']['sourceURL']) if xID != ID: xlinks = [] for author in xrepo: if author in repo: xlinks.append(author) lname = "%s@%s" % (ID, xID) # Link name rname = "%s@%s" % (xID, ID) # Reverse link name if len(xlinks) >= minLinks and not rname in repo_links: mylinks[xID] = len(xlinks) repo_links[lname] = repo_links.get(lname, 0) + len(xlinks) # How many contributors in common between project A and B? if repo_links[lname] > max_shared: max_shared = repo_links[lname] if ID not in repo_notoriety: repo_notoriety[ID] = set() repo_notoriety[ID].update(mylinks.keys()) # How many projects is this repo connected to? if ID not in repo_authors: repo_authors[ID] = set() repo_authors[ID].update(repo) # How many projects is this repo connected to? if ID != oID: repo_commits[ID] = repo_commits.get(ID, 0) + repo_commits[oID] if repo_commits[ID] > max_commits: max_commits = repo_commits[ID] # Used for calculating max link thickness if len(repo_notoriety[ID]) > max_links: max_links = len(repo_notoriety[ID]) if len(repo_authors[ID]) > max_authors: max_authors = len(repo_authors[ID]) # Used for calculating max sphere size in charts # Now, pull it all together! nodes = [] links = [] existing_repos = [] for sourceID in repo_notoriety.keys(): lsize = 0 for k in repo_links.keys(): fr, to = k.split('@') if fr == sourceID or to == sourceID: lsize += 1 asize = len(repo_authors[sourceID]) doc = { 'id': sourceID, 'name': sourceID, 'commits': repo_commits[sourceID], 'authors': asize, 'links': lsize, 'size': max(5, (1 - abs(math.log10(asize / max_authors))) * 45), 'tooltip': "%u connections, %u contributors, %u commits" % (lsize, asize, repo_commits[sourceID]) } nodes.append(doc) existing_repos.append(sourceID) for k, s in repo_links.items(): size = s fr, to = k.split('@') if fr in existing_repos and to in existing_repos: doc = { 'source': fr, 'target': to, 'value': max(1, (size/max_shared) * 8), 'name': "%s ↔ %s" % (fr, to), 'tooltip': "%u committers in common" % size } links.append(doc) JSON_OUT = { 'maxLinks': max_links, 'maxShared': max_shared, 'widgetType': { 'chartType': 'link' # Recommendation for the UI }, 'links': links, 'nodes': nodes, 'interval': interval, 'okay': True, 'responseTime': time.time() - now } yield json.dumps(JSON_OUT)

api/pages/code/relationships.py (201 lines of code) (raw):