in eden/scm/edenscm/mercurial/copies.py [0:0]
def _fullcopytracing(repo, c1, c2, base):
"""The full copytracing algorithm which finds all the new files that were
added from merge base up to the top commit and for each file it checks if
this file was copied from another file.
This is pretty slow when a lot of changesets are involved but will track all
the copies.
"""
# In certain scenarios (e.g. graft, update or rebase), base can be
# overridden We still need to know a real common ancestor in this case We
# can't just compute _c1.ancestor(_c2) and compare it to ca, because there
# can be multiple common ancestors, e.g. in case of bidmerge. Because our
# caller may not know if the revision passed in lieu of the CA is a genuine
# common ancestor or not without explicitly checking it, it's better to
# determine that here.
#
# base.descendant(wc) and base.descendant(base) are False, work around that
_c1 = c1.p1() if c1.rev() is None else c1
_c2 = c2.p1() if c2.rev() is None else c2
# an endpoint is "dirty" if it isn't a descendant of the merge base
# if we have a dirty endpoint, we need to trigger graft logic, and also
# keep track of which endpoint is dirty
dirtyc1 = not (base == _c1 or base.descendant(_c1))
dirtyc2 = not (base == _c2 or base.descendant(_c2))
graft = dirtyc1 or dirtyc2
tca = base
if graft:
tca = _c1.ancestor(_c2)
limit = _findlimit(repo, c1.rev(), c2.rev())
if limit is None:
# no common ancestor, no copies
return {}, {}, {}, {}, {}
if limit in repo:
repo.ui.debug(" searching for copies back to %s\n" % repo[limit])
m1 = c1.manifest()
m2 = c2.manifest()
mb = base.manifest()
# gather data from _checkcopies:
# - diverge = record all diverges in this dict
# - copy = record all non-divergent copies in this dict
# - fullcopy = record all copies in this dict
# - incomplete = record non-divergent partial copies here
# - incompletediverge = record divergent partial copies here
diverge = {} # divergence data is shared
incompletediverge = {}
data1 = {
"copy": {},
"fullcopy": {},
"incomplete": {},
"diverge": diverge,
"incompletediverge": incompletediverge,
}
data2 = {
"copy": {},
"fullcopy": {},
"incomplete": {},
"diverge": diverge,
"incompletediverge": incompletediverge,
}
# find interesting file sets from manifests
addedinm1 = m1.filesnotin(mb)
addedinm2 = m2.filesnotin(mb)
bothnew = sorted(addedinm1 & addedinm2)
if tca == base:
# unmatched file from base
u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
u1u, u2u = u1r, u2r
else:
# unmatched file from base (DAG rotation in the graft case)
u1r, u2r = _computenonoverlap(
repo, c1, c2, addedinm1, addedinm2, baselabel="base"
)
# unmatched file from topological common ancestors (no DAG rotation)
# need to recompute this for directory move handling when grafting
mta = tca.manifest()
u1u, u2u = _computenonoverlap(
repo,
c1,
c2,
m1.filesnotin(mta),
m2.filesnotin(mta),
baselabel="topological common ancestor",
)
for f in u1u:
_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
for f in u2u:
_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
copy = dict(data1["copy"])
copy.update(data2["copy"])
fullcopy = dict(data1["fullcopy"])
fullcopy.update(data2["fullcopy"])
if dirtyc1:
_combinecopies(
data2["incomplete"], data1["incomplete"], copy, diverge, incompletediverge
)
else:
_combinecopies(
data1["incomplete"], data2["incomplete"], copy, diverge, incompletediverge
)
renamedelete = {}
renamedeleteset = set()
divergeset = set()
for of, fl in list(diverge.items()):
if len(fl) == 1 or of in c1 or of in c2:
del diverge[of] # not actually divergent, or not a rename
if of not in c1 and of not in c2:
# renamed on one side, deleted on the other side, but filter
# out files that have been renamed and then deleted
renamedelete[of] = [f for f in fl if f in c1 or f in c2]
renamedeleteset.update(fl) # reverse map for below
else:
divergeset.update(fl) # reverse map for below
if bothnew:
repo.ui.debug(" unmatched files new in both:\n %s\n" % "\n ".join(bothnew))
bothdiverge = {}
bothincompletediverge = {}
remainder = {}
both1 = {
"copy": {},
"fullcopy": {},
"incomplete": {},
"diverge": bothdiverge,
"incompletediverge": bothincompletediverge,
}
both2 = {
"copy": {},
"fullcopy": {},
"incomplete": {},
"diverge": bothdiverge,
"incompletediverge": bothincompletediverge,
}
for f in bothnew:
_checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
_checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
if dirtyc1:
# incomplete copies may only be found on the "dirty" side for bothnew
assert not both2["incomplete"]
remainder = _combinecopies(
{}, both1["incomplete"], copy, bothdiverge, bothincompletediverge
)
elif dirtyc2:
assert not both1["incomplete"]
remainder = _combinecopies(
{}, both2["incomplete"], copy, bothdiverge, bothincompletediverge
)
else:
# incomplete copies and divergences can't happen outside grafts
assert not both1["incomplete"]
assert not both2["incomplete"]
assert not bothincompletediverge
for f in remainder:
assert f not in bothdiverge
ic = remainder[f]
if ic[0] in (m1 if dirtyc1 else m2):
# backed-out rename on one side, but watch out for deleted files
bothdiverge[f] = ic
for of, fl in bothdiverge.items():
if len(fl) == 2 and fl[0] == fl[1]:
copy[fl[0]] = of # not actually divergent, just matching renames
if fullcopy and repo.ui.debugflag:
repo.ui.debug(
" all copies found (* = to merge, ! = divergent, "
"% = renamed and deleted):\n"
)
for f in sorted(fullcopy):
note = ""
if f in copy:
note += "*"
if f in divergeset:
note += "!"
if f in renamedeleteset:
note += "%"
repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note))
del divergeset
if not fullcopy:
return copy, {}, diverge, renamedelete, {}
repo.ui.debug(" checking for directory renames\n")
# generate a directory move map
d1, d2 = c1.dirs(), c2.dirs()
invalid = set()
dirmove = {}
# examine each file copy for a potential directory move, which is
# when all the files in a directory are moved to a new directory
for dst, src in pycompat.iteritems(fullcopy):
dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
if dsrc in invalid:
# already seen to be uninteresting
continue
elif dsrc in d1 and ddst in d1:
# directory wasn't entirely moved locally
invalid.add(dsrc + "/")
elif dsrc in d2 and ddst in d2:
# directory wasn't entirely moved remotely
invalid.add(dsrc + "/")
elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
# files from the same directory moved to two different places
invalid.add(dsrc + "/")
else:
# looks good so far
dirmove[dsrc + "/"] = ddst + "/"
for i in invalid:
if i in dirmove:
del dirmove[i]
del d1, d2, invalid
if not dirmove:
return copy, {}, diverge, renamedelete, {}
for d in dirmove:
repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d]))
movewithdir = {}
# check unaccounted nonoverlapping files against directory moves
for f in u1r + u2r:
if f not in fullcopy:
for d in dirmove:
if f.startswith(d):
# new file added in a directory that was moved, move it
df = dirmove[d] + f[len(d) :]
if df not in copy:
movewithdir[f] = df
repo.ui.debug(
(" pending file src: '%s' -> " "dst: '%s'\n") % (f, df)
)
break
return copy, movewithdir, diverge, renamedelete, dirmove