hghooks/convert-pushlog-db.py (122 lines of code) (raw):
# Copyright (C) 2010 Mozilla Foundation
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# This script imports data from the older flat-file pushlog format,
# and the newer sqlite pushlog format into a newer sqlite schema.
#
# Unfortunately, when changing between the older format and the db,
# the semantics changed somewhat. The older format used to record the
# *HEAD* revision as of a push, but the db records the *first* changeset
# in a group of pushed changes. To make life simpler, the new schema
# will record all changesets for a push, but we need to migrate the old data
# over.
# To do so, we grab all logged pushes from the old log and the db,
# and then for each logged push, if it is in the old log, then it's a head,
# so store all changes since the previous push with this push. Otherwise,
# it's a 'first changeset', so store all changes up until the next push
# with this push. At the end we'll have one entry in the new pushlog
# table for every push, and one entry per-changeset in the changesets
# table, mapped back to the pushlog table.
try:
import sqlite3 as sqlite
except ImportError:
from pysqlite2 import dbapi2 as sqlite
import sys
import os.path
import re
from datetime import datetime
import time
from calendar import timegm
from rfc822 import parsedate_tz, mktime_tz
from mercurial import ui, hg
from mercurial.node import hex
reader = re.compile(r'^"([a-f0-9]{40})"\t"([^\t]*)"\t"([^\t]*)"$')
def readlog(logfile):
"""Read a pushlog and yield (node, user, date) for each line. Returns
all the entries in chronological order. |date| is a timestamp."""
try:
fd = open(logfile)
except IOError:
return []
entries = []
for line in fd:
(node, user, date) = reader.match(line).group(1, 2, 3)
entries.append((node, user, mktime_tz(parsedate_tz(date))))
fd.close()
return entries
def readpushdb(pushdb):
"""Read a pushlog db and yield (node, user, date) for each line. Returns
all the entries in chronological order. |date| is a timestamp."""
try:
conn = sqlite.connect(pushdb)
entries = []
res = conn.execute("SELECT node, user, date FROM pushlog ORDER BY date ASC")
for (node, user, date) in res:
entries.append(
(node, user, timegm(time.strptime(date, "%Y-%m-%dT%H:%M:%SZ")))
)
return entries
except:
return []
def nodeindb(pushdb, node):
return (
pushdb.execute("SELECT COUNT(*) from changesets WHERE node = ?", (node,)) == 1
)
if len(sys.argv) != 2:
print >> sys.stderr, "Must specify a repository as the only parameter (/path/to/repo/)"
sys.exit(1)
### Main entrypoint
repo_path = os.path.abspath(sys.argv[1])
if not os.path.exists(repo_path):
print >> sys.stderr, "Must specify a repository as the only parameter (/path/to/repo/)"
sys.exit(1)
try:
repo = hg.repository(ui.ui(), repo_path)
except:
print >> sys.stderr, "Must specify a repository as the only parameter (/path/to/repo/)"
sys.exit(1)
# we need to read both the old text pushlog
pushlog = os.path.join(repo_path, ".hg", "pushlog")
# ... and the newer pushlog db
oldpushdb = pushlog + ".db"
# and we're going to migrate them both to a new schema
pushdb = pushlog + "2.db"
# Open or create our new db
conn = sqlite.connect(pushdb)
conn.execute(
"CREATE TABLE IF NOT EXISTS changesets (pushid INTEGER, rev INTEGER, node text)"
)
conn.execute(
"CREATE TABLE IF NOT EXISTS pushlog (id INTEGER PRIMARY KEY AUTOINCREMENT, user TEXT, date INTEGER)"
)
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS changeset_node ON changesets (node)")
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS changeset_rev ON changesets (rev)")
conn.execute("CREATE INDEX IF NOT EXISTS pushlog_date ON pushlog (date)")
conn.execute("CREATE INDEX IF NOT EXISTS pushlog_user ON pushlog (user)")
# Read all entries from both pushlogs
flatlogentries = readlog(pushlog)
flatnodes = dict()
# dict for easy lookup of nodes
for (node, user, date) in flatlogentries:
flatnodes[node] = 1
logentries = readpushdb(oldpushdb)
if len(logentries) == 0:
# just in case someone is importing from an old flatfile log
logentries = flatlogentries
# sort by revision #, just in case we have two pushes with the same date
logentries = [
(node, repo.changectx(node), user, date) for (node, user, date) in logentries
]
logentries.sort(lambda a, b: cmp(a[1].rev(), b[1].rev()))
# start at the beginning
lastrev = -1
next = 0
for (node, ctx, user, date) in logentries:
next += 1
if nodeindb(conn, node):
# already in the database, move along
lastrev = ctx.rev()
continue
res = conn.execute("INSERT INTO pushlog (user, date) VALUES(?,?)", (user, date))
pushid = res.lastrowid
# insert this change first
conn.execute(
"INSERT INTO changesets (pushid,rev,node) VALUES(?,?,?)",
(pushid, ctx.rev(), node),
)
if node in flatnodes:
# this was a HEAD revision, see if any other changes were pushed
# along with it
if lastrev != ctx.rev() - 1:
for i in range(lastrev + 1, ctx.rev()):
c = repo.changectx(i)
conn.execute(
"INSERT INTO changesets (pushid,rev,node) VALUES(?,?,?)",
(pushid, c.rev(), hex(c.node())),
)
lastrev = ctx.rev()
else:
# this was the first change in a set of changes pushed, see
# if any other changes were pushed along with it
if next < len(logentries):
nextctx = repo.changectx(logentries[next][0])
if ctx.rev() + 1 != nextctx.rev():
for i in range(ctx.rev() + 1, nextctx.rev()):
c = repo.changectx(i)
conn.execute(
"INSERT INTO changesets (pushid,rev,node) VALUES(?,?,?)",
(pushid, c.rev(), hex(c.node())),
)
lastrev = c.rev()
else: # end of the list, see if we're missing any changes to tip
if not "tip" in ctx.tags():
tip = repo.changectx("tip")
# we want everything up to and including tip
for i in range(ctx.rev() + 1, tip.rev() + 1):
c = repo.changectx(i)
conn.execute(
"INSERT INTO changesets (pushid,rev,node) VALUES(?,?,?)",
(pushid, c.rev(), hex(c.node())),
)
lastrev = c.rev()
conn.commit()