in sync/command.py [0:0]
def do_migrate(git_gecko, git_wpt, **kwargs):
assert False, "Running this is probably a bad idea"
# Migrate refs from the refs/<type>/<subtype>/<status>/<obj_id>[/<seq_id>] format
# to refs/<type>/<subtype>/<obj_id>/<seq_id>
from collections import defaultdict
from . import base
import pygit2
git2_gecko = pygit2.Repository(git_gecko.working_dir)
git2_wpt = pygit2.Repository(git_wpt.working_dir)
repo_map = {git_gecko: git2_gecko,
git_wpt: git2_wpt}
rev_repo_map = {value: key for key, value in repo_map.items()}
special = {}
sync_ref = re.compile("^refs/"
"(?P<reftype>[^/]+)/"
"(?P<obj_type>[^/]+)/"
"(?P<subtype>[^/]+)/"
"(?P<status>[^0-9/]+)/"
"(?P<obj_id>[0-9]+)"
"(?:/(?P<seq_id>[0-9]*))?$")
print("Updating refs")
seen = defaultdict(list)
total_refs = 0
processing_refs = 0
for ref in itertools.chain(git_gecko.refs, git_wpt.refs):
git2_repo = repo_map[ref.repo]
ref = git2_repo.lookup_reference(ref.path)
total_refs += 1
if ref.name in special:
continue
m = sync_ref.match(ref.name)
if not m:
continue
if m.group("reftype") not in ("heads", "syncs"):
continue
if m.group("obj_type") not in ("sync", "try"):
continue
processing_refs += 1
assert m.group("subtype") in ("upstream", "downstream", "landing")
assert int(m.group("obj_id")) > 0
new_ref = "refs/{}/{}/{}/{}/{}".format(m.group("reftype"),
m.group("obj_type"),
m.group("subtype"),
m.group("obj_id"),
m.group("seq_id") or "0")
seen[(git2_repo, new_ref)].append((ref, m.group("status")))
duplicate = {}
delete = set()
for (repo, new_ref), refs in seen.items():
if len(refs) > 1:
# If we have multiple /syncs/ ref, but only one /heads/ ref, use the corresponding one
if new_ref.startswith("refs/syncs/"):
has_head = set()
no_head = set()
for ref, status in refs:
if "refs/heads/%s" % ref.name[len("refs/syncs/")] in repo.references:
has_head.add((ref.name, status))
else:
no_head.add((ref.name, status))
if len(has_head) == 1:
print(" Using {} from {}".format(list(has_head)[0][0].path,
" ".join(ref.name for ref, _ in refs)))
refs[:] = list(has_head)
delete |= {(repo, ref_name) for ref_name, _ in no_head}
if len(refs) > 1:
# If we have a later status, prefer that over an earlier one
matches = {ref.name: sync_ref.match(ref.name) for ref, _ in refs}
by_status = {matches[ref.name].group("status"): (ref, status) for (ref, status) in refs}
for target_status in ["complete", "wpt-merged", "incomplete", "infra-fail"]:
if target_status in by_status:
print(" Using {} from {}".format(by_status[target_status][0].name,
" ".join(ref.name for ref, _ in refs)))
delete |= {(repo, ref.name) for ref, status in refs
if ref != by_status[target_status]}
refs[:] = [by_status[target_status]]
if len(refs) > 1:
duplicate[(repo, new_ref)] = refs
if duplicate:
print(" ERROR! Got duplicate %s source refs" % len(duplicate))
for (repo, new_ref), refs in duplicate.items():
print(" {} {}: {}".format(repo.working_dir,
new_ref,
" ".join(ref.name for ref, _ in refs)))
return
for (repo, new_ref), refs in seen.items():
ref, _ = refs[0]
if ref.name.startswith("refs/syncs/sync/"):
if "refs/heads/%s" % ref.name[len("refs/syncs/"):] not in repo.references:
# Try with the post-migration head
m = sync_ref.match(ref.name)
ref_path = "refs/heads/{}/{}/{}/{}".format(m.group("obj_type"),
m.group("subtype"),
m.group("obj_id"),
m.group("seq_id"))
if ref_path not in repo.references:
print(" Missing head %s" % (ref.name))
created = 0
for i, ((repo, new_ref), refs) in enumerate(seen.items()):
assert len(refs) == 1
ref, status = refs[0]
print("Updating %s" % ref.name)
print(" Moving %s to %s %d/%d" % (ref.name, new_ref, i + 1, len(seen)))
if "/syncs/" in ref.name:
ref_obj = ref.peel().id
data = json.loads(ref.peel().tree["data"].data)
if data.get("status") != status:
with base.CommitBuilder(rev_repo_map[repo], "Add status", ref=ref.name) as commit:
now_ref_obj = ref.peel().id
if ref_obj != now_ref_obj:
data = json.loads(ref.peel().tree["data"].data)
data["status"] = status
commit.add_tree({"data": json.dumps(data)})
print("Making commit")
commit = commit.get().sha1
else:
commit = ref.peel().id
print(" Got commit %s" % commit)
if new_ref not in repo.references:
print(f" Rename {ref.name} {new_ref}")
repo.references.create(new_ref, commit)
created += 1
else:
print(" %s already exists" % new_ref)
delete.add((repo, ref.name))
for repo, ref_name in delete:
print(" Deleting %s" % ref_name)
repo.references.delete(ref_name)
print("%s total refs" % total_refs)
print("%s refs to process" % processing_refs)
print("%s refs to create" % created)
print("%s refs to delete" % len(delete))
print("Moving to single history")
# Migrate from refs/syncs/ to paths
sync_ref = re.compile("^refs/"
"syncs/"
"(?P<obj_type>[^/]*)/"
"(?P<subtype>[^/]*)/"
"(?P<obj_id>[^/]*)/"
"(?P<seq_id>[0-9]*)$")
delete = set()
initial_ref = git.Reference(git_gecko, "refs/syncs/data")
if initial_ref.is_valid():
existing_paths = {item.path for item in initial_ref.commit.tree.traverse()}
else:
existing_paths = set()
for ref in git_gecko.refs:
m = sync_ref.match(ref.path)
if not m:
continue
path = "{}/{}/{}/{}".format(m.group("obj_type"),
m.group("subtype"),
m.group("obj_id"),
m.group("seq_id"))
if path not in existing_paths:
with base.CommitBuilder(git_gecko,
"Migrate %s to single ref for data" % ref.path,
ref="refs/syncs/data") as commit:
data = json.load(ref.commit.tree["data"].data_stream)
print(f" Moving path {path}")
tree = {path: json.dumps(data)}
commit.add_tree(tree)
delete.add(ref.path)
git2_repo = repo_map[git_gecko]
for ref_name in delete:
git2_repo.references.delete(ref_name)