void diffcore_rename_extended()

in diffcore-rename.c [1380:1719]


void diffcore_rename_extended(struct diff_options *options,
			      struct mem_pool *pool,
			      struct strintmap *relevant_sources,
			      struct strintmap *dirs_removed,
			      struct strmap *dir_rename_count,
			      struct strmap *cached_pairs)
{
	int detect_rename = options->detect_rename;
	int minimum_score = options->rename_score;
	struct diff_queue_struct *q = &diff_queued_diff;
	struct diff_queue_struct outq = DIFF_QUEUE_INIT;
	struct diff_score *mx;
	int i, j, rename_count, skip_unmodified = 0;
	int num_destinations, dst_cnt;
	int num_sources, want_copies;
	struct progress *progress = NULL;
	struct mem_pool local_pool;
	struct dir_rename_info info;
	struct diff_populate_filespec_options dpf_options = {
		.check_binary = 0,
		.missing_object_cb = NULL,
		.missing_object_data = NULL
	};
	struct inexact_prefetch_options prefetch_options = {
		.repo = options->repo
	};

	trace2_region_enter("diff", "setup", options->repo);
	info.setup = 0;
	ASSERT(!dir_rename_count || strmap_empty(dir_rename_count));
	want_copies = (detect_rename == DIFF_DETECT_COPY);
	if (dirs_removed && (break_idx || want_copies))
		BUG("dirs_removed incompatible with break/copy detection");
	if (break_idx && relevant_sources)
		BUG("break detection incompatible with source specification");
	if (!minimum_score)
		minimum_score = DEFAULT_RENAME_SCORE;

	for (i = 0; i < q->nr; i++) {
		struct diff_filepair *p = q->queue[i];
		if (!DIFF_FILE_VALID(p->one)) {
			if (!DIFF_FILE_VALID(p->two))
				continue; /* unmerged */
			else if (options->single_follow &&
				 strcmp(options->single_follow, p->two->path))
				continue; /* not interested */
			else if (!options->flags.rename_empty &&
				 is_empty_blob_oid(&p->two->oid, the_repository->hash_algo))
				continue;
			else if (add_rename_dst(p) < 0) {
				warning("skipping rename detection, detected"
					" duplicate destination '%s'",
					p->two->path);
				goto cleanup;
			}
		}
		else if (!options->flags.rename_empty &&
			 is_empty_blob_oid(&p->one->oid, the_repository->hash_algo))
			continue;
		else if (!DIFF_PAIR_UNMERGED(p) && !DIFF_FILE_VALID(p->two)) {
			/*
			 * If the source is a broken "delete", and
			 * they did not really want to get broken,
			 * that means the source actually stays.
			 * So we increment the "rename_used" score
			 * by one, to indicate ourselves as a user
			 */
			if (p->broken_pair && !p->score)
				p->one->rename_used++;
			register_rename_src(p);
		}
		else if (want_copies) {
			/*
			 * Increment the "rename_used" score by
			 * one, to indicate ourselves as a user.
			 */
			p->one->rename_used++;
			register_rename_src(p);
		}
	}
	trace2_region_leave("diff", "setup", options->repo);
	if (rename_dst_nr == 0 || rename_src_nr == 0)
		goto cleanup; /* nothing to do */

	trace2_region_enter("diff", "exact renames", options->repo);
	mem_pool_init(&local_pool, 32*1024);
	/*
	 * We really want to cull the candidates list early
	 * with cheap tests in order to avoid doing deltas.
	 */
	rename_count = find_exact_renames(options, &local_pool);
	/*
	 * Discard local_pool immediately instead of at "cleanup:" in order
	 * to reduce maximum memory usage; inexact rename detection uses up
	 * a fair amount of memory, and mem_pools can too.
	 */
	mem_pool_discard(&local_pool, 0);
	trace2_region_leave("diff", "exact renames", options->repo);

	/* Did we only want exact renames? */
	if (minimum_score == MAX_SCORE)
		goto cleanup;

	num_sources = rename_src_nr;

	if (want_copies || break_idx) {
		/*
		 * Cull sources:
		 *   - remove ones corresponding to exact renames
		 *   - remove ones not found in relevant_sources
		 */
		trace2_region_enter("diff", "cull after exact", options->repo);
		remove_unneeded_paths_from_src(want_copies, relevant_sources);
		trace2_region_leave("diff", "cull after exact", options->repo);
	} else {
		/* Determine minimum score to match basenames */
		double factor = 0.5;
		char *basename_factor = getenv("GIT_BASENAME_FACTOR");
		int min_basename_score;

		if (basename_factor)
			factor = strtol(basename_factor, NULL, 10)/100.0;
		assert(factor >= 0.0 && factor <= 1.0);
		min_basename_score = minimum_score +
			(int)(factor * (MAX_SCORE - minimum_score));

		/*
		 * Cull sources:
		 *   - remove ones involved in renames (found via exact match)
		 */
		trace2_region_enter("diff", "cull after exact", options->repo);
		remove_unneeded_paths_from_src(want_copies, NULL);
		trace2_region_leave("diff", "cull after exact", options->repo);

		/* Preparation for basename-driven matching. */
		trace2_region_enter("diff", "dir rename setup", options->repo);
		initialize_dir_rename_info(&info, relevant_sources,
					   dirs_removed, dir_rename_count,
					   cached_pairs);
		trace2_region_leave("diff", "dir rename setup", options->repo);

		/* Utilize file basenames to quickly find renames. */
		trace2_region_enter("diff", "basename matches", options->repo);
		rename_count += find_basename_matches(options,
						      min_basename_score,
						      &info,
						      relevant_sources,
						      dirs_removed);
		trace2_region_leave("diff", "basename matches", options->repo);

		/*
		 * Cull sources, again:
		 *   - remove ones involved in renames (found via basenames)
		 *   - remove ones not found in relevant_sources
		 * and
		 *   - remove ones in relevant_sources which are needed only
		 *     for directory renames IF no ancestry directory
		 *     actually needs to know any more individual path
		 *     renames under them
		 */
		trace2_region_enter("diff", "cull basename", options->repo);
		remove_unneeded_paths_from_src(want_copies, relevant_sources);
		handle_early_known_dir_renames(&info, relevant_sources,
					       dirs_removed);
		trace2_region_leave("diff", "cull basename", options->repo);
	}

	/* Calculate how many rename destinations are left */
	num_destinations = (rename_dst_nr - rename_count);
	num_sources = rename_src_nr; /* rename_src_nr reflects lower number */

	/* All done? */
	if (!num_destinations || !num_sources)
		goto cleanup;

	switch (too_many_rename_candidates(num_destinations, num_sources,
					   options)) {
	case 1:
		goto cleanup;
	case 2:
		options->degraded_cc_to_c = 1;
		skip_unmodified = 1;
		break;
	default:
		break;
	}

	trace2_region_enter("diff", "inexact renames", options->repo);
	if (options->show_rename_progress) {
		progress = start_delayed_progress(
				the_repository,
				_("Performing inexact rename detection"),
				(uint64_t)num_destinations * (uint64_t)num_sources);
	}

	/* Finish setting up dpf_options */
	prefetch_options.skip_unmodified = skip_unmodified;
	if (options->repo == the_repository && repo_has_promisor_remote(the_repository)) {
		dpf_options.missing_object_cb = inexact_prefetch;
		dpf_options.missing_object_data = &prefetch_options;
	}

	CALLOC_ARRAY(mx, st_mult(NUM_CANDIDATE_PER_DST, num_destinations));
	for (dst_cnt = i = 0; i < rename_dst_nr; i++) {
		struct diff_filespec *two = rename_dst[i].p->two;
		struct diff_score *m;

		if (rename_dst[i].is_rename)
			continue; /* exact or basename match already handled */

		m = &mx[dst_cnt * NUM_CANDIDATE_PER_DST];
		for (j = 0; j < NUM_CANDIDATE_PER_DST; j++)
			m[j].dst = -1;

		for (j = 0; j < rename_src_nr; j++) {
			struct diff_filespec *one = rename_src[j].p->one;
			struct diff_score this_src;

			assert(!one->rename_used || want_copies || break_idx);

			if (skip_unmodified &&
			    diff_unmodified_pair(rename_src[j].p))
				continue;

			this_src.score = estimate_similarity(options->repo,
							     one, two,
							     minimum_score,
							     &dpf_options);
			this_src.name_score = basename_same(one, two);
			this_src.dst = i;
			this_src.src = j;
			record_if_better(m, &this_src);
			/*
			 * Once we run estimate_similarity,
			 * We do not need the text anymore.
			 */
			diff_free_filespec_blob(one);
			diff_free_filespec_blob(two);
		}
		dst_cnt++;
		display_progress(progress,
				 (uint64_t)dst_cnt * (uint64_t)num_sources);
	}
	stop_progress(&progress);

	/* cost matrix sorted by most to least similar pair */
	STABLE_QSORT(mx, dst_cnt * NUM_CANDIDATE_PER_DST, score_compare);

	rename_count += find_renames(mx, dst_cnt, minimum_score, 0,
				     &info, dirs_removed);
	if (want_copies)
		rename_count += find_renames(mx, dst_cnt, minimum_score, 1,
					     &info, dirs_removed);
	free(mx);
	trace2_region_leave("diff", "inexact renames", options->repo);

 cleanup:
	/* At this point, we have found some renames and copies and they
	 * are recorded in rename_dst.  The original list is still in *q.
	 */
	trace2_region_enter("diff", "write back to queue", options->repo);
	for (i = 0; i < q->nr; i++) {
		struct diff_filepair *p = q->queue[i];
		struct diff_filepair *pair_to_free = NULL;

		if (DIFF_PAIR_UNMERGED(p)) {
			diff_q(&outq, p);
		}
		else if (!DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
			/* Creation */
			diff_q(&outq, p);
		}
		else if (DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two)) {
			/*
			 * Deletion
			 *
			 * We would output this delete record if:
			 *
			 * (1) this is a broken delete and the counterpart
			 *     broken create remains in the output; or
			 * (2) this is not a broken delete, and rename_dst
			 *     does not have a rename/copy to move p->one->path
			 *     out of existence.
			 *
			 * Otherwise, the counterpart broken create
			 * has been turned into a rename-edit; or
			 * delete did not have a matching create to
			 * begin with.
			 */
			if (DIFF_PAIR_BROKEN(p)) {
				/* broken delete */
				struct diff_rename_dst *dst = locate_rename_dst(p);
				if (options->single_follow && dst &&
				    strcmp(dst->p->two->path, p->two->path))
					dst = NULL;
				if (dst && dst->is_rename)
					/* counterpart is now rename/copy */
					pair_to_free = p;
			}
			else {
				if (p->one->rename_used)
					/* this path remains */
					pair_to_free = p;
			}

			if (!pair_to_free)
				diff_q(&outq, p);
		}
		else if (!diff_unmodified_pair(p))
			/* all the usual ones need to be kept */
			diff_q(&outq, p);
		else
			/* no need to keep unmodified pairs */
			pair_to_free = p;

		if (pair_to_free)
			pool_diff_free_filepair(pool, pair_to_free);
	}
	diff_debug_queue("done copying original", &outq);

	free(q->queue);
	*q = outq;
	diff_debug_queue("done collapsing", q);

	for (i = 0; i < rename_dst_nr; i++)
		if (rename_dst[i].filespec_to_free)
			pool_free_filespec(pool, rename_dst[i].filespec_to_free);

	cleanup_dir_rename_info(&info, dirs_removed, dir_rename_count != NULL);
	FREE_AND_NULL(rename_dst);
	rename_dst_nr = rename_dst_alloc = 0;
	FREE_AND_NULL(rename_src);
	rename_src_nr = rename_src_alloc = 0;
	if (break_idx) {
		strintmap_clear(break_idx);
		FREE_AND_NULL(break_idx);
	}
	trace2_region_leave("diff", "write back to queue", options->repo);
	return;
}