in eden/scm/contrib/synthrepo.py [0:0]
def synthesize(ui, repo, descpath, **opts):
"""synthesize commits based on a model of an existing repository
The model must have been generated by :hg:`analyze`. Commits will
be generated randomly according to the probabilities described in
the model. If --initfiles is set, the repository will be seeded with
the given number files following the modeled repository's directory
structure.
When synthesizing new content, commit descriptions, and user
names, words will be chosen randomly from a dictionary that is
presumed to contain one word per line. Use --dict to specify the
path to an alternate dictionary to use.
"""
try:
fp = hg.openpath(ui, descpath)
except Exception as err:
raise error.Abort("%s: %s" % (descpath, err[0].strerror))
desc = json.load(fp)
fp.close()
def cdf(l):
if not l:
return [], []
vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
t = float(sum(probs, 0))
s, cdfs = 0, []
for v in probs:
s += v
cdfs.append(s / t)
return vals, cdfs
lineschanged = cdf(desc["lineschanged"])
fileschanged = cdf(desc["fileschanged"])
filesadded = cdf(desc["filesadded"])
dirsadded = cdf(desc["dirsadded"])
filesremoved = cdf(desc["filesremoved"])
linelengths = cdf(desc["linelengths"])
parents = cdf(desc["parents"])
p1distance = cdf(desc["p1distance"])
p2distance = cdf(desc["p2distance"])
interarrival = cdf(desc["interarrival"])
linesinfilesadded = cdf(desc["linesinfilesadded"])
tzoffset = cdf(desc["tzoffset"])
dictfile = opts.get("dict") or "/usr/share/dict/words"
try:
fp = open(dictfile, "rU")
except IOError as err:
raise error.Abort("%s: %s" % (dictfile, err.strerror))
words = fp.read().splitlines()
fp.close()
initdirs = {}
if desc["initdirs"]:
for k, v in desc["initdirs"]:
initdirs[k.encode("utf-8").replace(".hg", "_hg")] = v
initdirs = renamedirs(initdirs, words)
initdirscdf = cdf(initdirs)
def pick(cdf):
return cdf[0][bisect.bisect_left(cdf[1], random.random())]
def pickpath():
return os.path.join(pick(initdirscdf), random.choice(words))
def makeline(minimum=0):
total = max(minimum, pick(linelengths))
c, l = 0, []
while c < total:
w = random.choice(words)
c += len(w) + 1
l.append(w)
return " ".join(l)
wlock = repo.wlock()
lock = repo.lock()
nevertouch = {".hgsub", ".hgignore", ".hgtags"}
progress = ui.progress
_synthesizing = _("synthesizing")
_files = _("initial files")
_changesets = _("changesets")
# Synthesize a single initial revision adding files to the repo according
# to the modeled directory structure.
initcount = int(opts["initfiles"])
if initcount and initdirs:
pctx = repo[None].parents()[0]
dirs = set(pctx.dirs())
files = {}
def validpath(path):
# Don't pick filenames which are already directory names.
if path in dirs:
return False
# Don't pick directories which were used as file names.
while path:
if path in files:
return False
path = os.path.dirname(path)
return True
for i in xrange(0, initcount):
ui.progress(_synthesizing, i, unit=_files, total=initcount)
path = pickpath()
while not validpath(path):
path = pickpath()
data = "%s contents\n" % path
files[path] = data
dir = os.path.dirname(path)
while dir and dir not in dirs:
dirs.add(dir)
dir = os.path.dirname(dir)
def filectxfn(repo, memctx, path):
return context.memfilectx(repo, memctx, path, files[path])
ui.progress(_synthesizing, None)
message = "synthesized wide repo with %d files" % (len(files),)
mc = context.memctx(
repo,
[pctx.node(), nullid],
message,
pycompat.iterkeys(files),
filectxfn,
ui.username(),
"%d %d" % util.makedate(),
)
initnode = mc.commit()
if ui.debugflag:
hexfn = hex
else:
hexfn = short
ui.status(_("added commit %s with %d files\n") % (hexfn(initnode), len(files)))
# Synthesize incremental revisions to the repository, adding repo depth.
count = int(opts["count"])
heads = set(map(repo.changelog.rev, repo.heads()))
for i in xrange(count):
progress(_synthesizing, i, unit=_changesets, total=count)
node = repo.changelog.node
revs = len(repo)
def pickhead(heads, distance):
if heads:
lheads = sorted(heads)
rev = revs - min(pick(distance), revs)
if rev < lheads[-1]:
rev = lheads[bisect.bisect_left(lheads, rev)]
else:
rev = lheads[-1]
return rev, node(rev)
return nullrev, nullid
r1 = revs - min(pick(p1distance), revs)
p1 = node(r1)
# the number of heads will grow without bound if we use a pure
# model, so artificially constrain their proliferation
toomanyheads = len(heads) > random.randint(1, 20)
if p2distance[0] and (pick(parents) == 2 or toomanyheads):
r2, p2 = pickhead(heads.difference([r1]), p2distance)
else:
r2, p2 = nullrev, nullid
pl = [p1, p2]
pctx = repo[r1]
mf = pctx.manifest()
mfk = mf.keys()
changes = {}
if mfk:
for __ in xrange(pick(fileschanged)):
for __ in xrange(10):
fctx = pctx.filectx(random.choice(mfk))
path = fctx.path()
if not (
path in nevertouch or fctx.isbinary() or "l" in fctx.flags()
):
break
lines = fctx.data().splitlines()
add, remove = pick(lineschanged)
for __ in xrange(remove):
if not lines:
break
del lines[random.randrange(0, len(lines))]
for __ in xrange(add):
lines.insert(random.randint(0, len(lines)), makeline())
path = fctx.path()
changes[path] = "\n".join(lines) + "\n"
for __ in xrange(pick(filesremoved)):
path = random.choice(mfk)
for __ in xrange(10):
path = random.choice(mfk)
if path not in changes:
break
if filesadded:
dirs = list(pctx.dirs())
dirs.insert(0, "")
for __ in xrange(pick(filesadded)):
pathstr = ""
while pathstr in dirs:
path = [random.choice(dirs)]
if pick(dirsadded):
path.append(random.choice(words))
path.append(random.choice(words))
pathstr = "/".join(filter(None, path))
data = (
"\n".join(makeline() for __ in xrange(pick(linesinfilesadded))) + "\n"
)
changes[pathstr] = data
def filectxfn(repo, memctx, path):
if path not in changes:
return None
return context.memfilectx(repo, memctx, path, changes[path])
if not changes:
continue
if revs:
date = repo["tip"].date()[0] + pick(interarrival)
else:
date = time.time() - (86400 * count)
# dates in mercurial must be positive, fit in 32-bit signed integers.
date = min(0x7FFFFFFF, max(0, date))
user = random.choice(words) + "@" + random.choice(words)
mc = context.memctx(
repo,
pl,
makeline(minimum=2),
sorted(changes),
filectxfn,
user,
"%d %d" % (date, pick(tzoffset)),
)
newnode = mc.commit()
heads.add(repo.changelog.rev(newnode))
heads.discard(r1)
heads.discard(r2)
lock.release()
wlock.release()