hgext/serverlog/scripts/repo-totals-by-hour.py (33 lines of code) (raw):

#!/usr/bin/env python # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import datetime import sys def totals_by_hour(fh): hours = {} for line in fh: parts = line.rstrip().split() try: when, repo, ip, command, size, t_wall, t_cpu = parts try: when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f") except ValueError: when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S") except (TypeError, ValueError): continue size = int(size) t_wall = float(t_wall) t_cpu = float(t_cpu) t = when.time().replace(minute=0, second=0, microsecond=0) when = when.combine(when.date(), t) repos = hours.setdefault(when, {}) totals = repos.setdefault(repo, [0, 0, 0.0, 0.0]) totals[0] += 1 totals[1] += size totals[2] += t_wall totals[3] += t_cpu for date, repos in sorted(hours.items()): for repo, totals in sorted(repos.items()): print( "%s\t%s\t%d\t%d\t%d\t%d" % (date.isoformat(), repo, totals[0], totals[1], totals[2], totals[3]) ) if __name__ == "__main__": totals_by_hour(sys.stdin)