git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>, "Jeff King" <peff@peff.net>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 3/7] diff: add --rename-file
Date: Wed, 20 Jan 2016 18:06:04 +0700	[thread overview]
Message-ID: <1453287968-26000-4-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1453287968-26000-1-git-send-email-pclouds@gmail.com>

Git's heuristics to detect renames or copies works most of the time.
This option can be used to correct the result when it goes wrong.
Matching pairs get max rename score and override even exact rename
detection.

Note that --rename-file does not try to break existing diff pairs. So
if you have "abc => def" in your file, but they are already paired up
(e.g. "abc => abc" and "def => def") and not broken down by -B, then
nothing happens.

An assumption is made in this patch, that the rename file only contains
a couple rename pairs, not thousands of them. Looping through all
rename source and destination for each rename line will not affect
performance and we can keep the code simple.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/diff-options.txt |  7 +++++
 diff.c                         | 10 +++++++
 diff.h                         |  1 +
 diffcore-rename.c              | 64 ++++++++++++++++++++++++++++++++++++++++--
 t/t4001-diff-rename.sh         | 33 ++++++++++++++++++++++
 5 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 306b7e3..7ae04a0 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -380,6 +380,13 @@ endif::git-log[]
 	projects, so use it with caution.  Giving more than one
 	`-C` option has the same effect.
 
+--rename-file=<path>::
+	The given file contains explicit rename pairs that override
+	automatic detected renames. Each line contains a rename pair
+	in the following format:
++
+<source path> <space> "=>" <space> <destination path>
+
 -D::
 --irreversible-delete::
 	Omit the preimage for deletes, i.e. print only the header but not
diff --git a/diff.c b/diff.c
index 8d38fe8..36cf08b 100644
--- a/diff.c
+++ b/diff.c
@@ -3773,6 +3773,16 @@ int diff_opt_parse(struct diff_options *options,
 		DIFF_OPT_SET(options, RENAME_EMPTY);
 	else if (!strcmp(arg, "--no-rename-empty"))
 		DIFF_OPT_CLR(options, RENAME_EMPTY);
+	else if (skip_prefix(arg, "--rename-file=", &arg)) {
+		struct strbuf sb = STRBUF_INIT;
+		const char *path = arg;
+
+		if (prefix)
+			path = prefix_filename(prefix, strlen(prefix), path);
+		if (strbuf_read_file(&sb, path, 0) == -1)
+			die(_("unable to read %s"), path);
+		options->manual_renames = strbuf_detach(&sb, NULL); /* leak */
+	}
 	else if (!strcmp(arg, "--relative"))
 		DIFF_OPT_SET(options, RELATIVE_NAME);
 	else if (skip_prefix(arg, "--relative=", &arg)) {
diff --git a/diff.h b/diff.h
index 76b5536..37179ba 100644
--- a/diff.h
+++ b/diff.h
@@ -176,6 +176,7 @@ struct diff_options {
 	diff_prefix_fn_t output_prefix;
 	int output_prefix_length;
 	void *output_prefix_data;
+	const char *manual_renames;
 
 	int diff_path_counter;
 };
diff --git a/diffcore-rename.c b/diffcore-rename.c
index af1fe08..79beec8 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -346,8 +346,11 @@ static int find_exact_renames(struct diff_options *options)
 		insert_file_table(&file_table, i, rename_src[i].p->one);
 
 	/* Walk the destinations and find best source match */
-	for (i = 0; i < rename_dst_nr; i++)
+	for (i = 0; i < rename_dst_nr; i++) {
+		if (rename_dst[i].pair)
+			continue; /* dealt with exact match already. */
 		renames += find_identical_files(&file_table, i, options);
+	}
 
 	/* Free the hash data structure and entries */
 	hashmap_free(&file_table, 1);
@@ -355,6 +358,61 @@ static int find_exact_renames(struct diff_options *options)
 	return renames;
 }
 
+static int manual_rename(const char *src, int srclen,
+			 const char *dst, int dstlen)
+{
+	int src_index, dst_index;
+
+	for (src_index = 0; src_index < rename_src_nr; src_index++) {
+		const char *path = rename_src[src_index].p->one->path;
+		if (strlen(path) == srclen && !strncmp(path, src, srclen))
+			break;
+	}
+	if (src_index == rename_src_nr)
+		return 0;
+
+	for (dst_index = 0; dst_index < rename_dst_nr; dst_index++) {
+		const char *path = rename_dst[dst_index].two->path;
+		if (strlen(path) == dstlen && !strncmp(path, dst, dstlen))
+			break;
+	}
+	if (dst_index == rename_dst_nr)
+		return 0;
+
+	record_rename_pair(dst_index, src_index, MAX_SCORE);
+	return 1;
+}
+
+static int find_manual_renames(struct diff_options *options)
+{
+	int renames = 0;
+	const char *p, *end;
+
+	if (!options->manual_renames)
+		return 0;
+
+	p = options->manual_renames;
+	end = p + strlen(p);
+	while (p < end) {
+		const char *line_end = strchr(p, '\n');
+		const char *arrow = strstr(p, " => ");
+		const char *src = p, *dst;
+
+		if (!line_end)
+			line_end = end;
+		p = line_end + 1;
+
+		if (!arrow || arrow >= line_end)
+			continue;
+
+		dst = arrow + strlen(" => ");
+		renames += manual_rename(src, arrow - src,
+					 dst, line_end - dst);
+	}
+
+	return renames;
+}
+
 #define NUM_CANDIDATE_PER_DST 4
 static void record_if_better(struct diff_score m[], struct diff_score *o)
 {
@@ -500,11 +558,13 @@ void diffcore_rename(struct diff_options *options)
 	if (rename_dst_nr == 0 || rename_src_nr == 0)
 		goto cleanup; /* nothing to do */
 
+	rename_count = find_manual_renames(options);
+
 	/*
 	 * We really want to cull the candidates list early
 	 * with cheap tests in order to avoid doing deltas.
 	 */
-	rename_count = find_exact_renames(options);
+	rename_count += find_exact_renames(options);
 
 	/* Did we only want exact renames? */
 	if (minimum_score == MAX_SCORE)
diff --git a/t/t4001-diff-rename.sh b/t/t4001-diff-rename.sh
index 2f327b7..ab9a666 100755
--- a/t/t4001-diff-rename.sh
+++ b/t/t4001-diff-rename.sh
@@ -156,4 +156,37 @@ test_expect_success 'rename pretty print common prefix and suffix overlap' '
 	test_i18ngrep " d/f/{ => f}/e " output
 '
 
+test_expect_success 'manual rename correction' '
+	test_create_repo correct-rename &&
+	(
+		cd correct-rename &&
+		echo one > old-one &&
+		echo two > old-two &&
+		git add old-one old-two &&
+		git commit -m old &&
+		git rm old-one old-two &&
+		echo one > new-one &&
+		echo two > new-two &&
+		git add new-one new-two &&
+		git commit -m new &&
+		git diff -M --summary HEAD^ | grep rename >actual &&
+		cat >expected <<-\EOF &&
+		 rename old-one => new-one (100%)
+		 rename old-two => new-two (100%)
+		EOF
+		test_cmp expected actual &&
+
+		cat >correction <<-\EOF &&
+		old-one => new-two
+		old-two => new-one
+		EOF
+		git diff -M --rename-file=correction --summary HEAD^ | grep rename >actual &&
+		cat >expected <<-\EOF &&
+		 rename old-two => new-one (100%)
+		 rename old-one => new-two (100%)
+		EOF
+		test_cmp expected actual
+	)
+'
+
 test_done
-- 
2.7.0.125.g9eec362

  parent reply	other threads:[~2016-01-20 11:07 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-20 11:06 [PATCH 0/7] Diff rename, manual correction, round 2 Nguyễn Thái Ngọc Duy
2016-01-20 11:06 ` [PATCH 1/7] diff-no-index: do not take a redundant prefix argument Nguyễn Thái Ngọc Duy
2016-01-20 11:06 ` [PATCH 2/7] diff.c: take "prefix" argument in diff_opt_parse() Nguyễn Thái Ngọc Duy
2016-01-20 20:23   ` Junio C Hamano
2016-01-20 20:29     ` Jeff King
2016-01-20 21:49       ` Junio C Hamano
2016-01-21 11:48         ` Duy Nguyen
2016-01-21 23:01           ` Junio C Hamano
2016-01-20 11:06 ` Nguyễn Thái Ngọc Duy [this message]
2016-01-20 22:44   ` [PATCH 3/7] diff: add --rename-file Junio C Hamano
2016-01-20 22:47   ` Junio C Hamano
2016-01-20 11:06 ` [PATCH 4/7] log: add --rename-notes to correct renames per commit Nguyễn Thái Ngọc Duy
2016-01-20 23:29   ` Junio C Hamano
2016-01-22  1:00     ` Duy Nguyen
2016-01-20 11:06 ` [PATCH 5/7] merge: add --rename-file Nguyễn Thái Ngọc Duy
2016-01-20 11:06 ` [PATCH 6/7] diffcore-rename: allow to say "rename this blob to that blob" Nguyễn Thái Ngọc Duy
2016-01-20 11:06 ` [PATCH 7/7] merge: add --rename-notes Nguyễn Thái Ngọc Duy
2016-01-21 17:53   ` Junio C Hamano
2016-01-22  3:35     ` Duy Nguyen
2016-01-22 17:17       ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1453287968-26000-4-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).