git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Add -B flag to diff-* brothers.
@ 2005-05-28 19:33 Junio C Hamano
  2005-05-29  9:14 ` [PATCH] diff: move diffcore-break before diffcore-rename Junio C Hamano
  2005-05-29 21:56 ` [PATCH] Add -B flag to diff-* brothers Petr Baudis
  0 siblings, 2 replies; 4+ messages in thread
From: Junio C Hamano @ 2005-05-28 19:33 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List

This introduces diffcore-break.c, a new diffcore
transformation.

When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation.  This
makes it easier to review such a complete rewrite patch.

The -B flag takes the same syntax as the -M and -C flags to
specify the maximum amount of source material the resulting file
can still have to be considered a complete rewrite, and defaults
to 15% if not specified.  For example, this detects the complete
rewrite of ls-tree.c I sent earlier.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

Makefile          |    3 -
diff-cache.c      |    7 +++
diff-files.c      |    5 ++
diff-tree.c       |    7 +++
diff.h            |    2 
diffcore-break.c  |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
diffcore-rename.c |    4 -
diffcore.h        |    3 -
8 files changed, 137 insertions(+), 4 deletions(-)
new file (100644): diffcore-break.c

diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -48,7 +48,7 @@ LIB_OBJS += strbuf.o
 
 LIB_H += diff.h count-delta.h
 LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o diffcore-pathspec.o \
-	count-delta.o diffcore-order.o
+	count-delta.o diffcore-order.o diffcore-break.o
 
 LIB_OBJS += gitenv.o
 
@@ -131,6 +131,7 @@ diffcore-rename.o : $(LIB_H) diffcore.h
 diffcore-pathspec.o : $(LIB_H) diffcore.h
 diffcore-pickaxe.o : $(LIB_H) diffcore.h
 diffcore-order.o : $(LIB_H) diffcore.h
+diffcore-break.o : $(LIB_H) diffcore.h
 
 test: all
 	$(MAKE) -C t/ all
diff --git a/diff-cache.c b/diff-cache.c
--- a/diff-cache.c
+++ b/diff-cache.c
@@ -9,6 +9,7 @@ static int diff_setup_opt = 0;
 static int diff_score_opt = 0;
 static const char *pickaxe = NULL;
 static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
 static const char *orderfile = NULL;
 
 /* A file entry went away or appeared */
@@ -189,6 +190,10 @@ int main(int argc, const char **argv)
 			diff_output_format = DIFF_FORMAT_PATCH;
 			continue;
 		}
+		if (!strncmp(arg, "-B", 2)) {
+			diff_break_opt = diff_scoreopt_parse(arg);
+			continue;
+		}
 		if (!strncmp(arg, "-M", 2)) {
 			detect_rename = DIFF_DETECT_RENAME;
 			diff_score_opt = diff_scoreopt_parse(arg);
@@ -251,6 +256,8 @@ int main(int argc, const char **argv)
 		diffcore_rename(detect_rename, diff_score_opt);
 	if (pickaxe)
 		diffcore_pickaxe(pickaxe, pickaxe_opts);
+	if (0 <= diff_break_opt)
+		diffcore_break(diff_break_opt);
 	if (orderfile)
 		diffcore_order(orderfile);
 	diff_flush(diff_output_format, 1);
diff --git a/diff-files.c b/diff-files.c
--- a/diff-files.c
+++ b/diff-files.c
@@ -15,6 +15,7 @@ static int diff_setup_opt = 0;
 static int diff_score_opt = 0;
 static const char *pickaxe = NULL;
 static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
 static const char *orderfile = NULL;
 static int silent = 0;
 
@@ -60,6 +61,8 @@ int main(int argc, const char **argv)
 			orderfile = argv[1] + 2;
 		else if (!strcmp(argv[1], "--pickaxe-all"))
 			pickaxe_opts = DIFF_PICKAXE_ALL;
+		else if (!strncmp(argv[1], "-B", 2))
+			diff_break_opt = diff_scoreopt_parse(argv[1]);
 		else if (!strncmp(argv[1], "-M", 2)) {
 			diff_score_opt = diff_scoreopt_parse(argv[1]);
 			detect_rename = DIFF_DETECT_RENAME;
@@ -125,6 +128,8 @@ int main(int argc, const char **argv)
 		diffcore_rename(detect_rename, diff_score_opt);
 	if (pickaxe)
 		diffcore_pickaxe(pickaxe, pickaxe_opts);
+	if (0 <= diff_break_opt)
+		diffcore_break(diff_break_opt);
 	if (orderfile)
 		diffcore_order(orderfile);
 	diff_flush(diff_output_format, 1);
diff --git a/diff-tree.c b/diff-tree.c
--- a/diff-tree.c
+++ b/diff-tree.c
@@ -14,6 +14,7 @@ static int diff_setup_opt = 0;
 static int diff_score_opt = 0;
 static const char *pickaxe = NULL;
 static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
 static const char *orderfile = NULL;
 static const char *header = NULL;
 static const char *header_prefix = "";
@@ -266,6 +267,8 @@ static int call_diff_flush(void)
 		diffcore_rename(detect_rename, diff_score_opt);
 	if (pickaxe)
 		diffcore_pickaxe(pickaxe, pickaxe_opts);
+	if (0 <= diff_break_opt)
+		diffcore_break(diff_break_opt);
 	if (diff_queue_is_empty()) {
 		diff_flush(DIFF_FORMAT_NO_OUTPUT, 0);
 		return 0;
@@ -531,6 +534,10 @@ int main(int argc, const char **argv)
 			diff_score_opt = diff_scoreopt_parse(arg);
 			continue;
 		}
+		if (!strncmp(arg, "-B", 2)) {
+			diff_break_opt = diff_scoreopt_parse(arg);
+			continue;
+		}
 		if (!strcmp(arg, "-z")) {
 			diff_output_format = DIFF_FORMAT_MACHINE;
 			continue;
diff --git a/diff.h b/diff.h
--- a/diff.h
+++ b/diff.h
@@ -45,6 +45,8 @@ extern void diffcore_pathspec(const char
 
 extern void diffcore_order(const char *orderfile);
 
+extern void diffcore_break(int max_score);
+
 extern int diff_queue_is_empty(void);
 
 #define DIFF_FORMAT_HUMAN	0
diff --git a/diffcore-break.c b/diffcore-break.c
new file mode 100644
--- /dev/null
+++ b/diffcore-break.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include "cache.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "delta.h"
+#include "count-delta.h"
+
+static int very_different(struct diff_filespec *src,
+			  struct diff_filespec *dst,
+			  int max_score)
+{
+	/* dst is recorded as a modification of src.  Are they so
+	 * different that we are better off recording this as a pair
+	 * of delete and create?  max_score is maximum size that is
+	 * common between src and dst for the pair to be still considered
+	 * a change (not delete and create), and typically set to 5-10%.
+	 */
+	void *delta;
+	unsigned long delta_size, base_size;
+
+	if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
+		return 0; /* leave symlink rename alone */
+
+	if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
+		return 0; /* error but caught downstream */
+
+	delta_size = ((src->size < dst->size) ?
+		      (dst->size - src->size) : (src->size - dst->size));
+	base_size = ((src->size < dst->size) ? src->size : dst->size);
+
+	/*
+	 * If delta size is larger than
+	 * (MAX_SCORE-max_score)/MAX_SCORE * min(src->size, dst->size)
+	 * then we declare this is too big a change to be a patch.
+	 */
+	if (base_size * (MAX_SCORE-max_score) < delta_size * MAX_SCORE)
+		return 1;
+
+	if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+		return 0; /* error but caught downstream */
+
+	delta = diff_delta(src->data, src->size,
+			   dst->data, dst->size,
+			   &delta_size);
+
+	/* A delta that has a lot of literal additions would have
+	 * big delta_size no matter what else it does.
+	 */
+	if (base_size * (MAX_SCORE-max_score) < delta_size * MAX_SCORE)
+		return 1;
+
+	/* Estimate the edit size by interpreting delta. */
+	delta_size = count_delta(delta, delta_size);
+	free(delta);
+	if (delta_size == UINT_MAX)
+		return 0; /* error in delta computation */
+	/*
+	 * So how big is the edit?
+	 */
+	if (base_size * (MAX_SCORE-max_score) < delta_size * MAX_SCORE)
+		return 1;
+	return 0;
+}
+
+void diffcore_break(int max_score)
+{
+	struct diff_queue_struct *q = &diff_queued_diff;
+	struct diff_queue_struct outq;
+	int i;
+
+	if (!max_score)
+		max_score = DEFAULT_MAXIMUM_SCORE;
+
+	outq.nr = outq.alloc = 0;
+	outq.queue = NULL;
+
+	for (i = 0; i < q->nr; i++) {
+		struct diff_filepair *p = q->queue[i];
+		/* We deal only with in-place edit of non directory */
+		if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) &&
+		    !S_ISDIR(p->one->mode) && !S_ISDIR(p->two->mode) &&
+		    !strcmp(p->one->path, p->two->path) &&
+		    very_different(p->one, p->two, max_score)) {
+			/* Split this into delete and create */
+			struct diff_filespec *one_pre, *one_post,
+				*two_pre, *two_post;
+
+			/* deletion of one */
+			one_pre = alloc_filespec(p->one->path);
+			fill_filespec(one_pre, p->one->sha1, p->one->mode);
+			one_post = alloc_filespec(p->one->path);
+			diff_queue(&outq, one_pre, one_post);
+
+			/* creation of two */
+			two_pre = alloc_filespec(p->two->path);
+			two_post = alloc_filespec(p->two->path);
+			fill_filespec(two_post, p->two->sha1, p->two->mode);
+			diff_queue(&outq, two_pre, two_post);
+			diff_free_filepair(p);
+		}
+		else
+			diff_q(&outq, p);
+	}
+	free(q->queue);
+	*q = outq;
+
+	return;
+}
diff --git a/diffcore-rename.c b/diffcore-rename.c
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -208,8 +208,8 @@ static int score_compare(const void *a_,
 int diff_scoreopt_parse(const char *opt)
 {
 	int diglen, num, scale, i;
-	if (opt[0] != '-' || (opt[1] != 'M' && opt[1] != 'C'))
-		return -1; /* that is not a -M nor -C option */
+	if (opt[0] != '-' || (opt[1] != 'M' && opt[1] != 'C' && opt[1] != 'B'))
+		return -1; /* that is not a -M, -C nor -B option */
 	diglen = strspn(opt+2, "0123456789");
 	if (diglen == 0 || strlen(opt+2) != diglen)
 		return 0; /* use default */
diff --git a/diffcore.h b/diffcore.h
--- a/diffcore.h
+++ b/diffcore.h
@@ -9,7 +9,8 @@
  * in anything else.
  */
 #define MAX_SCORE 10000
-#define DEFAULT_MINIMUM_SCORE 5000
+#define DEFAULT_MINIMUM_SCORE 5000 /* rename/copy similarity minimum */
+#define DEFAULT_MAXIMUM_SCORE 1500 /* similarity maximum for break to happen */
 
 #define RENAME_DST_MATCHED 01
 
------------------------------------------------


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2005-05-29 23:48 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-05-28 19:33 [PATCH] Add -B flag to diff-* brothers Junio C Hamano
2005-05-29  9:14 ` [PATCH] diff: move diffcore-break before diffcore-rename Junio C Hamano
2005-05-29 21:56 ` [PATCH] Add -B flag to diff-* brothers Petr Baudis
2005-05-29 23:51   ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).