scripts/year_in_review.py (338 lines of code) (raw):

#!/usr/bin/env python import json import logging import subprocess import sys import textwrap import xmlrpc.client USAGE = 'Usage: year_in_review.py [--json] <YEAR>' # Note: Most of the bugzila api code comes from Scrumbugz. cache = {} log = logging.getLogger(__name__) BZ_URL = 'http://bugzilla.mozilla.org/xmlrpc.cgi' SESSION_COOKIES_CACHE_KEY = 'bugzilla-session-cookies' PRODUCTS = [ 'support.mozilla.org' ] BZ_RESOLUTIONS = [ '', 'FIXED', 'INVALID', 'WONTFIX', 'DUPLICATE', 'WORKSFORME', 'INCOMPLETE', 'SUPPORT', 'EXPIRED', 'MOVED' ] BZ_FIELDS = [ 'id', 'status', 'resolution', 'summary', 'whiteboard', 'assigned_to', 'priority', 'severity', 'product', 'component', 'blocks', 'depends_on', 'creator', 'creation_time', 'last_change_time', 'target_milestone', ] UNWANTED_COMPONENT_FIELDS = [ 'sort_key', 'is_active', 'default_qa_contact', 'default_assigned_to', 'description' ] class SessionTransport(xmlrpc.client.SafeTransport): """ XML-RPC HTTPS transport that stores auth cookies in the cache. """ _session_cookies = None @property def session_cookies(self): if self._session_cookies is None: cookie = cache.get(SESSION_COOKIES_CACHE_KEY) if cookie: self._session_cookies = cookie return self._session_cookies def parse_response(self, response): cookies = self.get_cookies(response) if cookies: self._session_cookies = cookies cache.set(SESSION_COOKIES_CACHE_KEY, self._session_cookies, 0) log.debug('Got cookie: %s', self._session_cookies) return xmlrpc.client.Transport.parse_response(self, response) def send_host(self, connection, host): cookies = self.session_cookies if cookies: for cookie in cookies: connection.putheader('Cookie', cookie) log.debug('Sent cookie: %s', cookie) return xmlrpc.client.Transport.send_host(self, connection, host) def get_cookies(self, response): cookie_headers = None if hasattr(response, 'msg'): cookies = response.msg.getheaders('set-cookie') if cookies: log.debug('Full cookies: %s', cookies) cookie_headers = [c.split(';', 1)[0] for c in cookies] return cookie_headers class BugzillaAPI(xmlrpc.client.ServerProxy): def get_bug_ids(self, **kwargs): """Return list of ids of bugs from a search.""" kwargs.update({ 'include_fields': ['id'], }) log.debug('Searching bugs with kwargs: %s', kwargs) bugs = self.Bug.search(kwargs) return [bug['id'] for bug in bugs.get('bugs', [])] def get_bugs(self, **kwargs): defaults = { 'include_fields': BZ_FIELDS, } get_history = kwargs.pop('history', True) get_comments = kwargs.pop('comments', True) defaults.update(kwargs) if 'ids' in defaults: defaults['permissive'] = True log.debug('Getting bugs with kwargs: %s', defaults) bugs = self.Bug.get(defaults) else: log.debug('Searching bugs with kwargs: %s', defaults) bugs = self.Bug.search(defaults) bug_ids = [bug['id'] for bug in bugs.get('bugs', [])] if not bug_ids: return bugs # mix in history and comments history = comments = {} if get_history: history = self.get_history(bug_ids) if get_comments: comments = self.get_comments(bug_ids) for bug in bugs['bugs']: bug['history'] = history.get(bug['id'], []) bug['comments'] = comments.get(bug['id'], {}).get('comments', []) bug['comments_count'] = len(comments.get(bug['id'], {}) .get('comments', [])) return bugs def get_history(self, bug_ids): log.debug('Getting history for bugs: %s', bug_ids) try: history = self.Bug.history({'ids': bug_ids}).get('bugs') except xmlrpc.client.Fault: log.exception('Problem getting history for bug ids: %s', bug_ids) return {} return dict((h['id'], h['history']) for h in history) def get_comments(self, bug_ids): log.debug('Getting comments for bugs: %s', bug_ids) try: comments = self.Bug.comments({ 'ids': bug_ids, 'include_fields': ['id', 'creator', 'time', 'text'], }).get('bugs') except xmlrpc.client.Fault: log.exception('Problem getting comments for bug ids: %s', bug_ids) return {} return dict((int(bid), cids) for bid, cids in comments.items()) def wrap(text, indent=' '): text = text.split('\n\n') text = [textwrap.fill(part, expand_tabs=True, initial_indent=indent, subsequent_indent=indent) for part in text] return '\n\n'.join(text) def parse_whiteboard(whiteboard): bits = { 'u': '', 'c': '', 'p': '', 's': '' } for part in whiteboard.split(' '): part = part.split('=') if len(part) != 2: continue if part[0] in bits: bits[part[0]] = part[1] return bits def bugzilla_stats(year): stats = [] bugzilla = BugzillaAPI( BZ_URL, transport=SessionTransport(use_datetime=True), allow_none=True) # ------------------------------------------- # Bugs created this year # ------------------------------------------- bugs = bugzilla.get_bugs( product=PRODUCTS, creation_time='%s-01-01' % year, include_fields=['id', 'creator', 'creation_time'], history=False, comments=False) bugs = bugs['bugs'] total = 0 creators = {} for bug in bugs: # We can only get creation_time >= somedate, so we need to nix # the bugs that are after the year we're looking for. if bug['creation_time'].year != int(year): continue total += 1 creators[bug['creator']] = creators.get(bug['creator'], 0) + 1 creators = sorted(list(creators.items()), key=lambda item: item[1], reverse=True) stats.append(('Bugs created', { 'total': total, 'breakdown': [ {'name': mem[0].split('@')[0], 'count': mem[1]} for mem in creators[:10]] })) # ------------------------------------------- # Bugs resolved this year # ------------------------------------------- bugs = bugzilla.get_bugs( product=PRODUCTS, last_change_time='%s-01-01' % year, include_fields=['id', 'summary', 'assigned_to', 'last_change_time', 'resolution'], status=['RESOLVED', 'VERIFIED', 'CLOSED'], history=True, comments=False) bugs = bugs['bugs'] total = 0 peeps = {} resolutions = {} traceback_bugs = [] research_bugs = [] tracker_bugs = [] for bug in bugs: # We can only get last_change_time >= somedate, so we need to # nix the bugs that are after the year we're looking for. if bug['last_change_time'].year != int(year): continue if bug['summary'].lower().startswith('[traceback]'): traceback_bugs.append(bug) if bug['summary'].lower().startswith('[research]'): research_bugs.append(bug) if bug['summary'].lower().startswith('[tracker]'): tracker_bugs.append(bug) for hist in bug['history']: for change in hist['changes']: if not change['field_name'] == 'resolution': continue # I think this history item comes from clearing the # resolution. i.e. reopening. if change['added'] == '': continue total += 1 # If the bug is marked FIXED, we assume that whoever # it was assigned to should get the "credit". If it # wasn't marked FIXED, then it's probably someone # doing triage and so whoever changed the resolution # should get "credit". if (change['added'] == 'FIXED' and not 'nobody' in bug['assigned_to']): person = bug['assigned_to'] else: person = hist['who'] peeps_dict = peeps.setdefault(person, {}) key = change['added'] peeps_dict[key] = peeps_dict.get(key, 0) + 1 resolutions[change['added']] = resolutions.get( change['added'], 0) + 1 peeps = sorted(list(peeps.items()), key=lambda item: sum(item[1].values()), reverse=True) stats.append(('Bugs resolved', { 'total': total, 'breakdown': [ {'name': mem[0].split('@')[0], 'total': sum(mem[1].values()), 'breakdown': list(mem[1].items())} for mem in peeps[:10] ] })) # ------------------------------------------- # Resolution stats # ------------------------------------------- resolutions = sorted(list(resolutions.items()), key=lambda item: item[1]) stats.append(('Bugs resolved breakdown', resolutions)) # ------------------------------------------- # Research bugs # ------------------------------------------- stats.append(('Research bugs', [ {'id': bug['id'], 'summary': bug['summary']} for bug in research_bugs ])) # ------------------------------------------- # Trackers # ------------------------------------------- stats.append(('Tracker bugs', [ {'id': bug['id'], 'summary': bug['summary']} for bug in tracker_bugs ])) return stats def git(*args): return subprocess.check_output(args) def git_stats(year): stats = [] # Get the shas for all the commits we're going to look at. all_commits = subprocess.check_output([ 'git', 'log', '--after=%s-01-01' % year, '--before=%s-01-01' % (int(year) + 1), '--format=%H' ]) all_commits = all_commits.splitlines() # Person -> # commits committers = {} # Person -> (# files changed, # inserted, # deleted) changes = {} for commit in all_commits: author = git('git', 'log', '--format=%an', '{0}~..{1}'.format(commit, commit)) author = author.strip() # FIXME - this is lame. what's going on is that there are # merge commits which have multiple authors, so we just grab # the second one. if '\n' in author: author = author.splitlines()[1] committers[author] = committers.get(author, 0) + 1 diff_data = git('git', 'diff', '--numstat', '--find-copies-harder', '{0}~..{1}'.format(commit, commit)) total_added = 0 total_deleted = 0 total_files = 0 for line in diff_data.splitlines(): added, deleted, fn = line.split('\t') if fn.startswith('vendor/'): continue if added != '-': total_added += int(added) if deleted != '-': total_deleted += int(deleted) total_files += 1 old_changes = changes.get(author, (0, 0, 0)) changes[author] = ( old_changes[0] + total_added, old_changes[1] + total_deleted, old_changes[2] + total_files ) print('Total commits:', len(all_commits)) print('') committers = sorted( list(committers.items()), key=lambda item: item[1], reverse=True) committers_data = [] for person, count in committers: committers_data.append({ 'name': person, 'data': { 'commits': count, 'added': changes[person][0], 'deleted': changes[person][1], 'files': changes[person][2] } }) stats.append(('Git commit data', { 'total commits': len(all_commits), 'total lines added': sum([item[0] for item in list(changes.values())]), 'total lines deleted': sum([item[1] for item in list(changes.values())]), 'total files changed': sum([item[2] for item in list(changes.values())]) })) stats.append(('Git committer data', committers_data)) return stats def main(argv): # XXX: This helps debug bugzilla xmlrpc bits. # logging.basicConfig(level=logging.DEBUG) do_json = False if not argv: print(USAGE) print('Error: Must specify the year. e.g. 2012') return 1 if '--json' in argv: print('>>> OMGWTFBBQ! You want it in JSON!') do_json = True argv.remove('--json') year = argv[0] output = [] output.append(('Year', year)) print('>>> Generating bugzilla stats....') output.extend(bugzilla_stats(year)) print('>>> Generating git stats....') output.extend(git_stats(year)) print('') if do_json: print(json.dumps(output, indent=2)) else: for mem in output: print('') print(mem[0]) print('=' * len(mem[0])) print('') # FIXME - this is gross print(json.dumps(mem[1], indent=2)) if __name__ == '__main__': sys.exit(main(sys.argv[1:]))