in tools/client-side/svn-vendor.py [0:0]
def detect(self, thresholds):
'Helper for finding copy/move destinations'
ilst = []
wlst = {}
ilst_map = {}
for p in self.items:
e = self.items[p]
if e.state[S_WC] != "-" and e.state[S_IM] == "-":
wlst[p] = [] # wlst hash stores copy destinations
elif e.state[S_WC] == "-" and e.state[S_IM] != "-":
# ilst just lists destination paths as tuples with node kind
ilst.append((e.state[S_IM], p))
iteration = 0
# Do not apply operations immediately - we'll need to post-process
# them to account for files/dirs moved inside a moved parent dir.
ops = []
to_be_removed = []
def get_renamed_name(path, rename_ops):
'''
Check if path was renamed/removed in the recorded operations,
return new name.
'''
for op_tuple in rename_ops:
# Since copies do not remove the source file, ignore them.
# We push no 'rm' ops in this function
if op_tuple[0] == "mv":
src = op_tuple[1]
dst = op_tuple[2]
if descendant_or_self(path, src):
path = path_rebase(path, src, dst)
return path
while len(wlst):
iteration += 1
self.info(2, ("Iteration %d: Possible sources: %d, " +
"possible destinations: %d") %
(iteration, len(wlst), len(ilst)))
ndst = len(ilst)
for idx, (nk, dst) in enumerate(sorted(ilst,
key=lambda s: filename_sort_key(s[1]))):
class SkipDestFile(Exception):
pass
# Check if moved as a part of a parent directory.
def check_moved_parent(xdst):
if xdst in ilst_map:
src = path_rebase(dst, xdst, ilst_map[xdst])
# Did it exist in copied directory?
if src in self.items and \
self.items[src].state[S_WC] == nk:
sim = self.similar(src, dst, thresholds[nk],
to_be_removed)
if sim > thresholds[nk]:
self.info(2, (" [%04d/%04d] Skipping `%s' " +
"(copied as part of `%s')") %
(idx, ndst, dst, xdst))
raise SkipDestFile
# Copied, not similar - search for other sources
raise StopIteration
try:
for_all_parents(dst, check_moved_parent)
except SkipDestFile:
continue
except StopIteration:
pass
self.info(2, (" [%04d/%04d] Looking for possible source " +
"for `%s'") % (idx, ndst, dst))
bestsrc = None
# Won't even consider those lower than threshold
bestsim = thresholds[nk]
for src in sorted(wlst.keys(),
key=lambda x: name_similarity(x, dst)):
sim = self.similar(src, dst, bestsim, to_be_removed)
if sim > bestsim:
self.info(3, " [similarity %4d] %s" % (sim, src))
bestsim = sim
bestsrc = src
if bestsim == 1000:
# No chance we're finding anything better
break
if bestsrc is not None:
wlst[bestsrc].append(dst)
ilst_map[dst] = bestsrc
# Discovered all copies/moves, now record them.
new_wlst = {}
for src in sorted(wlst.keys(), key=filename_sort_key):
dlist = wlst[src]
if len(dlist) == 0:
continue
if len(dlist) == 1:
ops.append(("mv", src, dlist[0]))
to_be_removed.append(src)
else:
# We don't remove the source here, it will be done when
# the changes are applied (it will remove all the WC files
# not found in imported sources). Avoiding removal here
# simplifies operation sorting below, since we would not
# be concerned with source file/dir disappearing before
# it is copied to its destination.
to_be_removed.append(src)
for d in dlist:
ops.append(("cp", src, d))
# If we copied something - recheck parent source directories.
# Since some source file/dir was scheduled to be removed,
# this may have increased the similarity to some destination.
def recheck_parent(x):
if x in wlst and len(wlst) == 0:
new_wlst[x] = []
for_all_parents(src, recheck_parent)
# At this point, if we're going to have the next iteration, we
# are only concerned about directories (by the way new_wlst is
# created above). So, filter out all files from ilst as well.
wlst = new_wlst
ilst = list(filter(lambda t: t[0] == 'D', ilst))
# Finished collecting the operations - now can post-process and
# apply them. First, sort copies/moves by destination (so that
# parent directories are created before files/subdirs are
# copied/renamed inside)
ops = sorted(ops, key=lambda op: filename_sort_key(op[2]))
for i, op_tuple in enumerate(ops):
# For each operation, go over its precedents to see if the source
# has been renamed. If it is, find out new name.
op = op_tuple[0]
src = get_renamed_name(op_tuple[1], reversed(ops[:i]))
if src != op_tuple[2]:
# Unless it became the same file after renames
try:
# Try to remove the destination, if it existed
self.remove(op_tuple[2])
except InvalidUsageException:
# Okay, it didn't exist
pass
self.handle_op((op, src, op_tuple[2]))