hghooks/convert-pushlog-db.py (122 lines of code) (raw):

# Copyright (C) 2010 Mozilla Foundation # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # This script imports data from the older flat-file pushlog format, # and the newer sqlite pushlog format into a newer sqlite schema. # # Unfortunately, when changing between the older format and the db, # the semantics changed somewhat. The older format used to record the # *HEAD* revision as of a push, but the db records the *first* changeset # in a group of pushed changes. To make life simpler, the new schema # will record all changesets for a push, but we need to migrate the old data # over. # To do so, we grab all logged pushes from the old log and the db, # and then for each logged push, if it is in the old log, then it's a head, # so store all changes since the previous push with this push. Otherwise, # it's a 'first changeset', so store all changes up until the next push # with this push. At the end we'll have one entry in the new pushlog # table for every push, and one entry per-changeset in the changesets # table, mapped back to the pushlog table. try: import sqlite3 as sqlite except ImportError: from pysqlite2 import dbapi2 as sqlite import sys import os.path import re from datetime import datetime import time from calendar import timegm from rfc822 import parsedate_tz, mktime_tz from mercurial import ui, hg from mercurial.node import hex reader = re.compile(r'^"([a-f0-9]{40})"\t"([^\t]*)"\t"([^\t]*)"$') def readlog(logfile): """Read a pushlog and yield (node, user, date) for each line. Returns all the entries in chronological order. |date| is a timestamp.""" try: fd = open(logfile) except IOError: return [] entries = [] for line in fd: (node, user, date) = reader.match(line).group(1, 2, 3) entries.append((node, user, mktime_tz(parsedate_tz(date)))) fd.close() return entries def readpushdb(pushdb): """Read a pushlog db and yield (node, user, date) for each line. Returns all the entries in chronological order. |date| is a timestamp.""" try: conn = sqlite.connect(pushdb) entries = [] res = conn.execute("SELECT node, user, date FROM pushlog ORDER BY date ASC") for (node, user, date) in res: entries.append( (node, user, timegm(time.strptime(date, "%Y-%m-%dT%H:%M:%SZ"))) ) return entries except: return [] def nodeindb(pushdb, node): return ( pushdb.execute("SELECT COUNT(*) from changesets WHERE node = ?", (node,)) == 1 ) if len(sys.argv) != 2: print >> sys.stderr, "Must specify a repository as the only parameter (/path/to/repo/)" sys.exit(1) ### Main entrypoint repo_path = os.path.abspath(sys.argv[1]) if not os.path.exists(repo_path): print >> sys.stderr, "Must specify a repository as the only parameter (/path/to/repo/)" sys.exit(1) try: repo = hg.repository(ui.ui(), repo_path) except: print >> sys.stderr, "Must specify a repository as the only parameter (/path/to/repo/)" sys.exit(1) # we need to read both the old text pushlog pushlog = os.path.join(repo_path, ".hg", "pushlog") # ... and the newer pushlog db oldpushdb = pushlog + ".db" # and we're going to migrate them both to a new schema pushdb = pushlog + "2.db" # Open or create our new db conn = sqlite.connect(pushdb) conn.execute( "CREATE TABLE IF NOT EXISTS changesets (pushid INTEGER, rev INTEGER, node text)" ) conn.execute( "CREATE TABLE IF NOT EXISTS pushlog (id INTEGER PRIMARY KEY AUTOINCREMENT, user TEXT, date INTEGER)" ) conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS changeset_node ON changesets (node)") conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS changeset_rev ON changesets (rev)") conn.execute("CREATE INDEX IF NOT EXISTS pushlog_date ON pushlog (date)") conn.execute("CREATE INDEX IF NOT EXISTS pushlog_user ON pushlog (user)") # Read all entries from both pushlogs flatlogentries = readlog(pushlog) flatnodes = dict() # dict for easy lookup of nodes for (node, user, date) in flatlogentries: flatnodes[node] = 1 logentries = readpushdb(oldpushdb) if len(logentries) == 0: # just in case someone is importing from an old flatfile log logentries = flatlogentries # sort by revision #, just in case we have two pushes with the same date logentries = [ (node, repo.changectx(node), user, date) for (node, user, date) in logentries ] logentries.sort(lambda a, b: cmp(a[1].rev(), b[1].rev())) # start at the beginning lastrev = -1 next = 0 for (node, ctx, user, date) in logentries: next += 1 if nodeindb(conn, node): # already in the database, move along lastrev = ctx.rev() continue res = conn.execute("INSERT INTO pushlog (user, date) VALUES(?,?)", (user, date)) pushid = res.lastrowid # insert this change first conn.execute( "INSERT INTO changesets (pushid,rev,node) VALUES(?,?,?)", (pushid, ctx.rev(), node), ) if node in flatnodes: # this was a HEAD revision, see if any other changes were pushed # along with it if lastrev != ctx.rev() - 1: for i in range(lastrev + 1, ctx.rev()): c = repo.changectx(i) conn.execute( "INSERT INTO changesets (pushid,rev,node) VALUES(?,?,?)", (pushid, c.rev(), hex(c.node())), ) lastrev = ctx.rev() else: # this was the first change in a set of changes pushed, see # if any other changes were pushed along with it if next < len(logentries): nextctx = repo.changectx(logentries[next][0]) if ctx.rev() + 1 != nextctx.rev(): for i in range(ctx.rev() + 1, nextctx.rev()): c = repo.changectx(i) conn.execute( "INSERT INTO changesets (pushid,rev,node) VALUES(?,?,?)", (pushid, c.rev(), hex(c.node())), ) lastrev = c.rev() else: # end of the list, see if we're missing any changes to tip if not "tip" in ctx.tags(): tip = repo.changectx("tip") # we want everything up to and including tip for i in range(ctx.rev() + 1, tip.rev() + 1): c = repo.changectx(i) conn.execute( "INSERT INTO changesets (pushid,rev,node) VALUES(?,?,?)", (pushid, c.rev(), hex(c.node())), ) lastrev = c.rev() conn.commit()