git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Max Kirillov <max@max630.net>
To: Junio C Hamano <gitster@pobox.com>
Cc: Max Kirillov <max@max630.net>, Jeff King <peff@peff.net>,
	git@vger.kernel.org
Subject: [PATCH/RFC] combine-diff.c: make intersect_paths() behave like hunk filtering
Date: Tue, 14 Apr 2015 07:09:13 +0300	[thread overview]
Message-ID: <1428984553-11363-1-git-send-email-max@max630.net> (raw)
In-Reply-To: <20150412054332.GA28555@wheezy.local>

* for `diff --cc` 2 cases:
  * the path must be changed since at least 2 parents, which
    should have the path different. In other words, the child and its
    parents must contain at least 3 different versions of file.
    Non-existing in the commit path counts as one version.
  * All parents are the same, but child commit differs from them.
* for `diff -c`: the path must be changed since at least 1 parent.

Signed-off-by: Max Kirillov <max@max630.net>
---
This what could be done to hide the added and removed files.
It also makes it work faster - diff --cc on the evil merge runs now 2.5 seconds
instead of 11 (or 0.4 without the series).

This also fails t4057, but I really don't see the logic of `diff -c`
othwerwise. It should show all changes, with trivial merges also, and it
is shown by long files in my t4059. But it uses same path filtering as
`diff --cc`, which is even more restrictive.
 combine-diff.c | 91 ++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 66 insertions(+), 25 deletions(-)

diff --git a/combine-diff.c b/combine-diff.c
index 2285c7c..f44032a 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -50,20 +50,49 @@ static void insert_path(struct combine_diff_path **pos, const char* path, int n,
 	*pos = p;
 }
 
-static int changed_parents(struct combine_diff_path *p, int n)
+static int path_not_interesting(struct combine_diff_path *p, int n,
+				struct diff_filespec *new_parent)
 {
 	int parent_idx;
-	int result = 0;
+	struct object_id first_parent;
+	int found_first = 0;
+	int found_same_parent = 0;
 
 	for (parent_idx = 0; parent_idx < n; parent_idx++) {
-		if (p->parent[parent_idx].status != ' ')
-			result++;
+		if (p->parent[parent_idx].status != ' ') {
+			if (found_first) {
+				if (hashcmp(p->parent[parent_idx].oid.hash, first_parent.hash)) {
+					/* found second different unique parent - non-trivial merge */
+					return 0;
+				}
+			} else {
+				found_first = 1;
+				hashcpy(first_parent.hash,
+					p->parent[parent_idx].oid.hash);
+			}
+		} else {
+			/* the new commit repeats some of parents */
+			found_same_parent = 1;
+		}
 	}
 
-	return result;
+	if (new_parent) {
+		if (hashcmp(p->oid.hash, new_parent->sha1)) {
+			if (!found_same_parent || hashcmp(first_parent.hash, new_parent->sha1)) {
+				return 0;
+			} else {
+				return 1;
+			}
+		} else {
+			found_same_parent = 1;
+		}
+	}
+
+	return found_same_parent;
 }
 
-static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr, int n, int num_parent)
+static struct combine_diff_path *intersect_paths(
+	struct combine_diff_path *curr, int n, int num_parent, int dense)
 {
 	struct diff_queue_struct *q = &diff_queued_diff;
 	struct combine_diff_path *p, **tail = &curr;
@@ -81,35 +110,46 @@ static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr,
 
 		if (cmp < 0) {
 			/* p->path not in q->queue[] */
-			if (num_parent > 2 && 2 - changed_parents(p, n) <= num_parent - n - 1) {
-				/* still can get 2 changed parents */
+			if (dense &&
+			    n == num_parent - 1 &&
+			    path_not_interesting(p, n, NULL)) {
+				/* only 1 unique different parent
+				   not interesting change */
+				*tail = p->next;
+				free(p);
+			} else {
+				/* already has or still can get 2 changed parents */
 				hashcpy(p->parent[n].oid.hash, p->oid.hash);
 				p->parent[n].mode = p->mode;
 				p->parent[n].status = ' ';
 				tail = &p->next;
-			} else {
-				*tail = p->next;
-				free(p);
 			}
 			continue;
-		}
-
-		if (cmp > 0) {
+		} else if (cmp > 0) {
 			/* q->queue[i] not in p->path */
-			if (1 <= num_parent - n - 1) {
-				insert_path(tail, q->queue[i]->two->path, n, num_parent, q->queue[i]);
+			if (!dense || n < num_parent - 1) {
+				insert_path(tail, q->queue[i]->two->path,
+					    n, num_parent, q->queue[i]);
 				tail = &(*tail)->next;
 			}
 			i++;
 			continue;
-		}
+		} else {
+			if (dense &&
+			    n == num_parent - 1 &&
+			    path_not_interesting(p, n, q->queue[i]->one)) {
+				*tail = p->next;
+				free(p);
+			} else {
+				hashcpy(p->parent[n].oid.hash, q->queue[i]->one->sha1);
+				p->parent[n].mode = q->queue[i]->one->mode;
+				p->parent[n].status = q->queue[i]->status;
 
-		hashcpy(p->parent[n].oid.hash, q->queue[i]->one->sha1);
-		p->parent[n].mode = q->queue[i]->one->mode;
-		p->parent[n].status = q->queue[i]->status;
+				tail = &p->next;
+			}
+			i++;
+		}
 
-		tail = &p->next;
-		i++;
 	}
 	return curr;
 }
@@ -1341,7 +1381,8 @@ static const char *path_path(void *obj)
 
 /* find set of paths that every parent touches */
 static struct combine_diff_path *find_paths_generic(const unsigned char *sha1,
-	const struct sha1_array *parents, struct diff_options *opt)
+	const struct sha1_array *parents, struct diff_options *opt,
+	int dense)
 {
 	struct combine_diff_path *paths = NULL;
 	int i, num_parent = parents->nr;
@@ -1367,7 +1408,7 @@ static struct combine_diff_path *find_paths_generic(const unsigned char *sha1,
 			opt->output_format = DIFF_FORMAT_NO_OUTPUT;
 		diff_tree_sha1(parents->sha1[i], sha1, "", opt);
 		diffcore_std(opt);
-		paths = intersect_paths(paths, i, num_parent);
+		paths = intersect_paths(paths, i, num_parent, dense);
 
 		/* if showing diff, show it in requested order */
 		if (opt->output_format != DIFF_FORMAT_NO_OUTPUT &&
@@ -1477,7 +1518,7 @@ void diff_tree_combined(const unsigned char *sha1,
 		 * diff(sha1,parent_i) for all i to do the job, specifically
 		 * for parent0.
 		 */
-		paths = find_paths_generic(sha1, parents, &diffopts);
+		paths = find_paths_generic(sha1, parents, &diffopts, dense);
 	}
 	else {
 		int stat_opt;
-- 
2.3.4.2801.g3d0809b

  parent reply	other threads:[~2015-04-14  4:09 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-02 20:34 [PATCH 0/4] diff --cc: relax path filtering Max Kirillov
2015-04-02 20:34 ` [PATCH 1/4] Add test for showing discarded changes with diff --cc Max Kirillov
2015-04-02 20:55   ` Junio C Hamano
2015-04-03 16:03     ` Max Kirillov
2015-04-02 20:34 ` [PATCH 2/4] combine-diff.c: refactor: extract insert_path() Max Kirillov
2015-04-02 20:34 ` [PATCH 3/4] diff --cc: relax too strict paths picking Max Kirillov
2015-04-02 20:59   ` Junio C Hamano
2015-04-02 20:34 ` [PATCH 4/4] t4059: rewrite to be adaptive to hunk filtering Max Kirillov
2015-04-02 21:13 ` [PATCH 0/4] diff --cc: relax path filtering Jeff King
2015-04-03 16:29   ` Max Kirillov
2015-04-03 15:58 ` [PATCH v2 " Max Kirillov
2015-04-03 15:58   ` [PATCH v2 1/4] t4059: test 'diff --cc' with a change from only few parents Max Kirillov
2015-04-11 20:04     ` Junio C Hamano
2015-04-11 21:07     ` Junio C Hamano
2015-04-11 21:20       ` Junio C Hamano
2015-04-12  5:43       ` Max Kirillov
2015-04-12  5:51         ` Junio C Hamano
2015-04-14  4:22           ` Max Kirillov
2015-04-14  4:09         ` Max Kirillov [this message]
2015-04-03 15:58   ` [PATCH v2 2/4] combine-diff.c: refactor: extract insert_path() Max Kirillov
2015-04-11 20:14     ` Junio C Hamano
2015-04-03 15:58   ` [PATCH v2 3/4] diff --cc: relax too strict paths picking Max Kirillov
2015-04-03 15:58   ` [PATCH v2 4/4] t4059: rewrite to be adaptive to hunk filtering Max Kirillov
2015-04-12  5:48     ` Junio C Hamano
2015-04-14  4:18       ` Max Kirillov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1428984553-11363-1-git-send-email-max@max630.net \
    --to=max@max630.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).