in diffcore-rename.c [1374:1712]
void diffcore_rename_extended(struct diff_options *options,
struct mem_pool *pool,
struct strintmap *relevant_sources,
struct strintmap *dirs_removed,
struct strmap *dir_rename_count,
struct strmap *cached_pairs)
{
int detect_rename = options->detect_rename;
int minimum_score = options->rename_score;
struct diff_queue_struct *q = &diff_queued_diff;
struct diff_queue_struct outq;
struct diff_score *mx;
int i, j, rename_count, skip_unmodified = 0;
int num_destinations, dst_cnt;
int num_sources, want_copies;
struct progress *progress = NULL;
struct mem_pool local_pool;
struct dir_rename_info info;
struct diff_populate_filespec_options dpf_options = {
.check_binary = 0,
.missing_object_cb = NULL,
.missing_object_data = NULL
};
struct inexact_prefetch_options prefetch_options = {
.repo = options->repo
};
trace2_region_enter("diff", "setup", options->repo);
info.setup = 0;
assert(!dir_rename_count || strmap_empty(dir_rename_count));
want_copies = (detect_rename == DIFF_DETECT_COPY);
if (dirs_removed && (break_idx || want_copies))
BUG("dirs_removed incompatible with break/copy detection");
if (break_idx && relevant_sources)
BUG("break detection incompatible with source specification");
if (!minimum_score)
minimum_score = DEFAULT_RENAME_SCORE;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
if (!DIFF_FILE_VALID(p->one)) {
if (!DIFF_FILE_VALID(p->two))
continue; /* unmerged */
else if (options->single_follow &&
strcmp(options->single_follow, p->two->path))
continue; /* not interested */
else if (!options->flags.rename_empty &&
is_empty_blob_oid(&p->two->oid))
continue;
else if (add_rename_dst(p) < 0) {
warning("skipping rename detection, detected"
" duplicate destination '%s'",
p->two->path);
goto cleanup;
}
}
else if (!options->flags.rename_empty &&
is_empty_blob_oid(&p->one->oid))
continue;
else if (!DIFF_PAIR_UNMERGED(p) && !DIFF_FILE_VALID(p->two)) {
/*
* If the source is a broken "delete", and
* they did not really want to get broken,
* that means the source actually stays.
* So we increment the "rename_used" score
* by one, to indicate ourselves as a user
*/
if (p->broken_pair && !p->score)
p->one->rename_used++;
register_rename_src(p);
}
else if (want_copies) {
/*
* Increment the "rename_used" score by
* one, to indicate ourselves as a user.
*/
p->one->rename_used++;
register_rename_src(p);
}
}
trace2_region_leave("diff", "setup", options->repo);
if (rename_dst_nr == 0 || rename_src_nr == 0)
goto cleanup; /* nothing to do */
trace2_region_enter("diff", "exact renames", options->repo);
mem_pool_init(&local_pool, 32*1024);
/*
* We really want to cull the candidates list early
* with cheap tests in order to avoid doing deltas.
*/
rename_count = find_exact_renames(options, &local_pool);
/*
* Discard local_pool immediately instead of at "cleanup:" in order
* to reduce maximum memory usage; inexact rename detection uses up
* a fair amount of memory, and mem_pools can too.
*/
mem_pool_discard(&local_pool, 0);
trace2_region_leave("diff", "exact renames", options->repo);
/* Did we only want exact renames? */
if (minimum_score == MAX_SCORE)
goto cleanup;
num_sources = rename_src_nr;
if (want_copies || break_idx) {
/*
* Cull sources:
* - remove ones corresponding to exact renames
* - remove ones not found in relevant_sources
*/
trace2_region_enter("diff", "cull after exact", options->repo);
remove_unneeded_paths_from_src(want_copies, relevant_sources);
trace2_region_leave("diff", "cull after exact", options->repo);
} else {
/* Determine minimum score to match basenames */
double factor = 0.5;
char *basename_factor = getenv("GIT_BASENAME_FACTOR");
int min_basename_score;
if (basename_factor)
factor = strtol(basename_factor, NULL, 10)/100.0;
assert(factor >= 0.0 && factor <= 1.0);
min_basename_score = minimum_score +
(int)(factor * (MAX_SCORE - minimum_score));
/*
* Cull sources:
* - remove ones involved in renames (found via exact match)
*/
trace2_region_enter("diff", "cull after exact", options->repo);
remove_unneeded_paths_from_src(want_copies, NULL);
trace2_region_leave("diff", "cull after exact", options->repo);
/* Preparation for basename-driven matching. */
trace2_region_enter("diff", "dir rename setup", options->repo);
initialize_dir_rename_info(&info, relevant_sources,
dirs_removed, dir_rename_count,
cached_pairs);
trace2_region_leave("diff", "dir rename setup", options->repo);
/* Utilize file basenames to quickly find renames. */
trace2_region_enter("diff", "basename matches", options->repo);
rename_count += find_basename_matches(options,
min_basename_score,
&info,
relevant_sources,
dirs_removed);
trace2_region_leave("diff", "basename matches", options->repo);
/*
* Cull sources, again:
* - remove ones involved in renames (found via basenames)
* - remove ones not found in relevant_sources
* and
* - remove ones in relevant_sources which are needed only
* for directory renames IF no ancestory directory
* actually needs to know any more individual path
* renames under them
*/
trace2_region_enter("diff", "cull basename", options->repo);
remove_unneeded_paths_from_src(want_copies, relevant_sources);
handle_early_known_dir_renames(&info, relevant_sources,
dirs_removed);
trace2_region_leave("diff", "cull basename", options->repo);
}
/* Calculate how many rename destinations are left */
num_destinations = (rename_dst_nr - rename_count);
num_sources = rename_src_nr; /* rename_src_nr reflects lower number */
/* All done? */
if (!num_destinations || !num_sources)
goto cleanup;
switch (too_many_rename_candidates(num_destinations, num_sources,
options)) {
case 1:
goto cleanup;
case 2:
options->degraded_cc_to_c = 1;
skip_unmodified = 1;
break;
default:
break;
}
trace2_region_enter("diff", "inexact renames", options->repo);
if (options->show_rename_progress) {
progress = start_delayed_progress(
_("Performing inexact rename detection"),
(uint64_t)num_destinations * (uint64_t)num_sources);
}
/* Finish setting up dpf_options */
prefetch_options.skip_unmodified = skip_unmodified;
if (options->repo == the_repository && has_promisor_remote()) {
dpf_options.missing_object_cb = inexact_prefetch;
dpf_options.missing_object_data = &prefetch_options;
}
CALLOC_ARRAY(mx, st_mult(NUM_CANDIDATE_PER_DST, num_destinations));
for (dst_cnt = i = 0; i < rename_dst_nr; i++) {
struct diff_filespec *two = rename_dst[i].p->two;
struct diff_score *m;
if (rename_dst[i].is_rename)
continue; /* exact or basename match already handled */
m = &mx[dst_cnt * NUM_CANDIDATE_PER_DST];
for (j = 0; j < NUM_CANDIDATE_PER_DST; j++)
m[j].dst = -1;
for (j = 0; j < rename_src_nr; j++) {
struct diff_filespec *one = rename_src[j].p->one;
struct diff_score this_src;
assert(!one->rename_used || want_copies || break_idx);
if (skip_unmodified &&
diff_unmodified_pair(rename_src[j].p))
continue;
this_src.score = estimate_similarity(options->repo,
one, two,
minimum_score,
&dpf_options);
this_src.name_score = basename_same(one, two);
this_src.dst = i;
this_src.src = j;
record_if_better(m, &this_src);
/*
* Once we run estimate_similarity,
* We do not need the text anymore.
*/
diff_free_filespec_blob(one);
diff_free_filespec_blob(two);
}
dst_cnt++;
display_progress(progress,
(uint64_t)dst_cnt * (uint64_t)num_sources);
}
stop_progress(&progress);
/* cost matrix sorted by most to least similar pair */
STABLE_QSORT(mx, dst_cnt * NUM_CANDIDATE_PER_DST, score_compare);
rename_count += find_renames(mx, dst_cnt, minimum_score, 0,
&info, dirs_removed);
if (want_copies)
rename_count += find_renames(mx, dst_cnt, minimum_score, 1,
&info, dirs_removed);
free(mx);
trace2_region_leave("diff", "inexact renames", options->repo);
cleanup:
/* At this point, we have found some renames and copies and they
* are recorded in rename_dst. The original list is still in *q.
*/
trace2_region_enter("diff", "write back to queue", options->repo);
DIFF_QUEUE_CLEAR(&outq);
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
struct diff_filepair *pair_to_free = NULL;
if (DIFF_PAIR_UNMERGED(p)) {
diff_q(&outq, p);
}
else if (!DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
/* Creation */
diff_q(&outq, p);
}
else if (DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two)) {
/*
* Deletion
*
* We would output this delete record if:
*
* (1) this is a broken delete and the counterpart
* broken create remains in the output; or
* (2) this is not a broken delete, and rename_dst
* does not have a rename/copy to move p->one->path
* out of existence.
*
* Otherwise, the counterpart broken create
* has been turned into a rename-edit; or
* delete did not have a matching create to
* begin with.
*/
if (DIFF_PAIR_BROKEN(p)) {
/* broken delete */
struct diff_rename_dst *dst = locate_rename_dst(p);
if (!dst)
BUG("tracking failed somehow; failed to find associated dst for broken pair");
if (dst->is_rename)
/* counterpart is now rename/copy */
pair_to_free = p;
}
else {
if (p->one->rename_used)
/* this path remains */
pair_to_free = p;
}
if (!pair_to_free)
diff_q(&outq, p);
}
else if (!diff_unmodified_pair(p))
/* all the usual ones need to be kept */
diff_q(&outq, p);
else
/* no need to keep unmodified pairs */
pair_to_free = p;
if (pair_to_free)
pool_diff_free_filepair(pool, pair_to_free);
}
diff_debug_queue("done copying original", &outq);
free(q->queue);
*q = outq;
diff_debug_queue("done collapsing", q);
for (i = 0; i < rename_dst_nr; i++)
if (rename_dst[i].filespec_to_free)
pool_free_filespec(pool, rename_dst[i].filespec_to_free);
cleanup_dir_rename_info(&info, dirs_removed, dir_rename_count != NULL);
FREE_AND_NULL(rename_dst);
rename_dst_nr = rename_dst_alloc = 0;
FREE_AND_NULL(rename_src);
rename_src_nr = rename_src_alloc = 0;
if (break_idx) {
strintmap_clear(break_idx);
FREE_AND_NULL(break_idx);
}
trace2_region_leave("diff", "write back to queue", options->repo);
return;
}