def _fullcopytracing()

in eden/scm/edenscm/mercurial/copies.py [0:0]


def _fullcopytracing(repo, c1, c2, base):
    """The full copytracing algorithm which finds all the new files that were
    added from merge base up to the top commit and for each file it checks if
    this file was copied from another file.

    This is pretty slow when a lot of changesets are involved but will track all
    the copies.
    """
    # In certain scenarios (e.g. graft, update or rebase), base can be
    # overridden We still need to know a real common ancestor in this case We
    # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
    # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
    # caller may not know if the revision passed in lieu of the CA is a genuine
    # common ancestor or not without explicitly checking it, it's better to
    # determine that here.
    #
    # base.descendant(wc) and base.descendant(base) are False, work around that
    _c1 = c1.p1() if c1.rev() is None else c1
    _c2 = c2.p1() if c2.rev() is None else c2
    # an endpoint is "dirty" if it isn't a descendant of the merge base
    # if we have a dirty endpoint, we need to trigger graft logic, and also
    # keep track of which endpoint is dirty
    dirtyc1 = not (base == _c1 or base.descendant(_c1))
    dirtyc2 = not (base == _c2 or base.descendant(_c2))
    graft = dirtyc1 or dirtyc2
    tca = base
    if graft:
        tca = _c1.ancestor(_c2)

    limit = _findlimit(repo, c1.rev(), c2.rev())
    if limit is None:
        # no common ancestor, no copies
        return {}, {}, {}, {}, {}
    if limit in repo:
        repo.ui.debug("  searching for copies back to %s\n" % repo[limit])

    m1 = c1.manifest()
    m2 = c2.manifest()
    mb = base.manifest()

    # gather data from _checkcopies:
    # - diverge = record all diverges in this dict
    # - copy = record all non-divergent copies in this dict
    # - fullcopy = record all copies in this dict
    # - incomplete = record non-divergent partial copies here
    # - incompletediverge = record divergent partial copies here
    diverge = {}  # divergence data is shared
    incompletediverge = {}
    data1 = {
        "copy": {},
        "fullcopy": {},
        "incomplete": {},
        "diverge": diverge,
        "incompletediverge": incompletediverge,
    }
    data2 = {
        "copy": {},
        "fullcopy": {},
        "incomplete": {},
        "diverge": diverge,
        "incompletediverge": incompletediverge,
    }

    # find interesting file sets from manifests
    addedinm1 = m1.filesnotin(mb)
    addedinm2 = m2.filesnotin(mb)
    bothnew = sorted(addedinm1 & addedinm2)
    if tca == base:
        # unmatched file from base
        u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
        u1u, u2u = u1r, u2r
    else:
        # unmatched file from base (DAG rotation in the graft case)
        u1r, u2r = _computenonoverlap(
            repo, c1, c2, addedinm1, addedinm2, baselabel="base"
        )
        # unmatched file from topological common ancestors (no DAG rotation)
        # need to recompute this for directory move handling when grafting
        mta = tca.manifest()
        u1u, u2u = _computenonoverlap(
            repo,
            c1,
            c2,
            m1.filesnotin(mta),
            m2.filesnotin(mta),
            baselabel="topological common ancestor",
        )

    for f in u1u:
        _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)

    for f in u2u:
        _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)

    copy = dict(data1["copy"])
    copy.update(data2["copy"])
    fullcopy = dict(data1["fullcopy"])
    fullcopy.update(data2["fullcopy"])

    if dirtyc1:
        _combinecopies(
            data2["incomplete"], data1["incomplete"], copy, diverge, incompletediverge
        )
    else:
        _combinecopies(
            data1["incomplete"], data2["incomplete"], copy, diverge, incompletediverge
        )

    renamedelete = {}
    renamedeleteset = set()
    divergeset = set()
    for of, fl in list(diverge.items()):
        if len(fl) == 1 or of in c1 or of in c2:
            del diverge[of]  # not actually divergent, or not a rename
            if of not in c1 and of not in c2:
                # renamed on one side, deleted on the other side, but filter
                # out files that have been renamed and then deleted
                renamedelete[of] = [f for f in fl if f in c1 or f in c2]
                renamedeleteset.update(fl)  # reverse map for below
        else:
            divergeset.update(fl)  # reverse map for below

    if bothnew:
        repo.ui.debug("  unmatched files new in both:\n   %s\n" % "\n   ".join(bothnew))
    bothdiverge = {}
    bothincompletediverge = {}
    remainder = {}
    both1 = {
        "copy": {},
        "fullcopy": {},
        "incomplete": {},
        "diverge": bothdiverge,
        "incompletediverge": bothincompletediverge,
    }
    both2 = {
        "copy": {},
        "fullcopy": {},
        "incomplete": {},
        "diverge": bothdiverge,
        "incompletediverge": bothincompletediverge,
    }
    for f in bothnew:
        _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
        _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
    if dirtyc1:
        # incomplete copies may only be found on the "dirty" side for bothnew
        assert not both2["incomplete"]
        remainder = _combinecopies(
            {}, both1["incomplete"], copy, bothdiverge, bothincompletediverge
        )
    elif dirtyc2:
        assert not both1["incomplete"]
        remainder = _combinecopies(
            {}, both2["incomplete"], copy, bothdiverge, bothincompletediverge
        )
    else:
        # incomplete copies and divergences can't happen outside grafts
        assert not both1["incomplete"]
        assert not both2["incomplete"]
        assert not bothincompletediverge
    for f in remainder:
        assert f not in bothdiverge
        ic = remainder[f]
        if ic[0] in (m1 if dirtyc1 else m2):
            # backed-out rename on one side, but watch out for deleted files
            bothdiverge[f] = ic
    for of, fl in bothdiverge.items():
        if len(fl) == 2 and fl[0] == fl[1]:
            copy[fl[0]] = of  # not actually divergent, just matching renames

    if fullcopy and repo.ui.debugflag:
        repo.ui.debug(
            "  all copies found (* = to merge, ! = divergent, "
            "% = renamed and deleted):\n"
        )
        for f in sorted(fullcopy):
            note = ""
            if f in copy:
                note += "*"
            if f in divergeset:
                note += "!"
            if f in renamedeleteset:
                note += "%"
            repo.ui.debug("   src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note))
    del divergeset

    if not fullcopy:
        return copy, {}, diverge, renamedelete, {}

    repo.ui.debug("  checking for directory renames\n")

    # generate a directory move map
    d1, d2 = c1.dirs(), c2.dirs()
    invalid = set()
    dirmove = {}

    # examine each file copy for a potential directory move, which is
    # when all the files in a directory are moved to a new directory
    for dst, src in pycompat.iteritems(fullcopy):
        dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
        if dsrc in invalid:
            # already seen to be uninteresting
            continue
        elif dsrc in d1 and ddst in d1:
            # directory wasn't entirely moved locally
            invalid.add(dsrc + "/")
        elif dsrc in d2 and ddst in d2:
            # directory wasn't entirely moved remotely
            invalid.add(dsrc + "/")
        elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
            # files from the same directory moved to two different places
            invalid.add(dsrc + "/")
        else:
            # looks good so far
            dirmove[dsrc + "/"] = ddst + "/"

    for i in invalid:
        if i in dirmove:
            del dirmove[i]
    del d1, d2, invalid

    if not dirmove:
        return copy, {}, diverge, renamedelete, {}

    for d in dirmove:
        repo.ui.debug("   discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d]))

    movewithdir = {}
    # check unaccounted nonoverlapping files against directory moves
    for f in u1r + u2r:
        if f not in fullcopy:
            for d in dirmove:
                if f.startswith(d):
                    # new file added in a directory that was moved, move it
                    df = dirmove[d] + f[len(d) :]
                    if df not in copy:
                        movewithdir[f] = df
                        repo.ui.debug(
                            ("   pending file src: '%s' -> " "dst: '%s'\n") % (f, df)
                        )
                    break

    return copy, movewithdir, diverge, renamedelete, dirmove