* [PATCH] Add -B flag to diff-* brothers.
@ 2005-05-28 19:33 Junio C Hamano
2005-05-29 9:14 ` [PATCH] diff: move diffcore-break before diffcore-rename Junio C Hamano
2005-05-29 21:56 ` [PATCH] Add -B flag to diff-* brothers Petr Baudis
0 siblings, 2 replies; 4+ messages in thread
From: Junio C Hamano @ 2005-05-28 19:33 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Git Mailing List
This introduces diffcore-break.c, a new diffcore
transformation.
When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation. This
makes it easier to review such a complete rewrite patch.
The -B flag takes the same syntax as the -M and -C flags to
specify the maximum amount of source material the resulting file
can still have to be considered a complete rewrite, and defaults
to 15% if not specified. For example, this detects the complete
rewrite of ls-tree.c I sent earlier.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
Makefile | 3 -
diff-cache.c | 7 +++
diff-files.c | 5 ++
diff-tree.c | 7 +++
diff.h | 2
diffcore-break.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
diffcore-rename.c | 4 -
diffcore.h | 3 -
8 files changed, 137 insertions(+), 4 deletions(-)
new file (100644): diffcore-break.c
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -48,7 +48,7 @@ LIB_OBJS += strbuf.o
LIB_H += diff.h count-delta.h
LIB_OBJS += diff.o diffcore-rename.o diffcore-pickaxe.o diffcore-pathspec.o \
- count-delta.o diffcore-order.o
+ count-delta.o diffcore-order.o diffcore-break.o
LIB_OBJS += gitenv.o
@@ -131,6 +131,7 @@ diffcore-rename.o : $(LIB_H) diffcore.h
diffcore-pathspec.o : $(LIB_H) diffcore.h
diffcore-pickaxe.o : $(LIB_H) diffcore.h
diffcore-order.o : $(LIB_H) diffcore.h
+diffcore-break.o : $(LIB_H) diffcore.h
test: all
$(MAKE) -C t/ all
diff --git a/diff-cache.c b/diff-cache.c
--- a/diff-cache.c
+++ b/diff-cache.c
@@ -9,6 +9,7 @@ static int diff_setup_opt = 0;
static int diff_score_opt = 0;
static const char *pickaxe = NULL;
static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
static const char *orderfile = NULL;
/* A file entry went away or appeared */
@@ -189,6 +190,10 @@ int main(int argc, const char **argv)
diff_output_format = DIFF_FORMAT_PATCH;
continue;
}
+ if (!strncmp(arg, "-B", 2)) {
+ diff_break_opt = diff_scoreopt_parse(arg);
+ continue;
+ }
if (!strncmp(arg, "-M", 2)) {
detect_rename = DIFF_DETECT_RENAME;
diff_score_opt = diff_scoreopt_parse(arg);
@@ -251,6 +256,8 @@ int main(int argc, const char **argv)
diffcore_rename(detect_rename, diff_score_opt);
if (pickaxe)
diffcore_pickaxe(pickaxe, pickaxe_opts);
+ if (0 <= diff_break_opt)
+ diffcore_break(diff_break_opt);
if (orderfile)
diffcore_order(orderfile);
diff_flush(diff_output_format, 1);
diff --git a/diff-files.c b/diff-files.c
--- a/diff-files.c
+++ b/diff-files.c
@@ -15,6 +15,7 @@ static int diff_setup_opt = 0;
static int diff_score_opt = 0;
static const char *pickaxe = NULL;
static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
static const char *orderfile = NULL;
static int silent = 0;
@@ -60,6 +61,8 @@ int main(int argc, const char **argv)
orderfile = argv[1] + 2;
else if (!strcmp(argv[1], "--pickaxe-all"))
pickaxe_opts = DIFF_PICKAXE_ALL;
+ else if (!strncmp(argv[1], "-B", 2))
+ diff_break_opt = diff_scoreopt_parse(argv[1]);
else if (!strncmp(argv[1], "-M", 2)) {
diff_score_opt = diff_scoreopt_parse(argv[1]);
detect_rename = DIFF_DETECT_RENAME;
@@ -125,6 +128,8 @@ int main(int argc, const char **argv)
diffcore_rename(detect_rename, diff_score_opt);
if (pickaxe)
diffcore_pickaxe(pickaxe, pickaxe_opts);
+ if (0 <= diff_break_opt)
+ diffcore_break(diff_break_opt);
if (orderfile)
diffcore_order(orderfile);
diff_flush(diff_output_format, 1);
diff --git a/diff-tree.c b/diff-tree.c
--- a/diff-tree.c
+++ b/diff-tree.c
@@ -14,6 +14,7 @@ static int diff_setup_opt = 0;
static int diff_score_opt = 0;
static const char *pickaxe = NULL;
static int pickaxe_opts = 0;
+static int diff_break_opt = -1;
static const char *orderfile = NULL;
static const char *header = NULL;
static const char *header_prefix = "";
@@ -266,6 +267,8 @@ static int call_diff_flush(void)
diffcore_rename(detect_rename, diff_score_opt);
if (pickaxe)
diffcore_pickaxe(pickaxe, pickaxe_opts);
+ if (0 <= diff_break_opt)
+ diffcore_break(diff_break_opt);
if (diff_queue_is_empty()) {
diff_flush(DIFF_FORMAT_NO_OUTPUT, 0);
return 0;
@@ -531,6 +534,10 @@ int main(int argc, const char **argv)
diff_score_opt = diff_scoreopt_parse(arg);
continue;
}
+ if (!strncmp(arg, "-B", 2)) {
+ diff_break_opt = diff_scoreopt_parse(arg);
+ continue;
+ }
if (!strcmp(arg, "-z")) {
diff_output_format = DIFF_FORMAT_MACHINE;
continue;
diff --git a/diff.h b/diff.h
--- a/diff.h
+++ b/diff.h
@@ -45,6 +45,8 @@ extern void diffcore_pathspec(const char
extern void diffcore_order(const char *orderfile);
+extern void diffcore_break(int max_score);
+
extern int diff_queue_is_empty(void);
#define DIFF_FORMAT_HUMAN 0
diff --git a/diffcore-break.c b/diffcore-break.c
new file mode 100644
--- /dev/null
+++ b/diffcore-break.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2005 Junio C Hamano
+ */
+#include "cache.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "delta.h"
+#include "count-delta.h"
+
+static int very_different(struct diff_filespec *src,
+ struct diff_filespec *dst,
+ int max_score)
+{
+ /* dst is recorded as a modification of src. Are they so
+ * different that we are better off recording this as a pair
+ * of delete and create? max_score is maximum size that is
+ * common between src and dst for the pair to be still considered
+ * a change (not delete and create), and typically set to 5-10%.
+ */
+ void *delta;
+ unsigned long delta_size, base_size;
+
+ if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
+ return 0; /* leave symlink rename alone */
+
+ if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
+ return 0; /* error but caught downstream */
+
+ delta_size = ((src->size < dst->size) ?
+ (dst->size - src->size) : (src->size - dst->size));
+ base_size = ((src->size < dst->size) ? src->size : dst->size);
+
+ /*
+ * If delta size is larger than
+ * (MAX_SCORE-max_score)/MAX_SCORE * min(src->size, dst->size)
+ * then we declare this is too big a change to be a patch.
+ */
+ if (base_size * (MAX_SCORE-max_score) < delta_size * MAX_SCORE)
+ return 1;
+
+ if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+ return 0; /* error but caught downstream */
+
+ delta = diff_delta(src->data, src->size,
+ dst->data, dst->size,
+ &delta_size);
+
+ /* A delta that has a lot of literal additions would have
+ * big delta_size no matter what else it does.
+ */
+ if (base_size * (MAX_SCORE-max_score) < delta_size * MAX_SCORE)
+ return 1;
+
+ /* Estimate the edit size by interpreting delta. */
+ delta_size = count_delta(delta, delta_size);
+ free(delta);
+ if (delta_size == UINT_MAX)
+ return 0; /* error in delta computation */
+ /*
+ * So how big is the edit?
+ */
+ if (base_size * (MAX_SCORE-max_score) < delta_size * MAX_SCORE)
+ return 1;
+ return 0;
+}
+
+void diffcore_break(int max_score)
+{
+ struct diff_queue_struct *q = &diff_queued_diff;
+ struct diff_queue_struct outq;
+ int i;
+
+ if (!max_score)
+ max_score = DEFAULT_MAXIMUM_SCORE;
+
+ outq.nr = outq.alloc = 0;
+ outq.queue = NULL;
+
+ for (i = 0; i < q->nr; i++) {
+ struct diff_filepair *p = q->queue[i];
+ /* We deal only with in-place edit of non directory */
+ if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) &&
+ !S_ISDIR(p->one->mode) && !S_ISDIR(p->two->mode) &&
+ !strcmp(p->one->path, p->two->path) &&
+ very_different(p->one, p->two, max_score)) {
+ /* Split this into delete and create */
+ struct diff_filespec *one_pre, *one_post,
+ *two_pre, *two_post;
+
+ /* deletion of one */
+ one_pre = alloc_filespec(p->one->path);
+ fill_filespec(one_pre, p->one->sha1, p->one->mode);
+ one_post = alloc_filespec(p->one->path);
+ diff_queue(&outq, one_pre, one_post);
+
+ /* creation of two */
+ two_pre = alloc_filespec(p->two->path);
+ two_post = alloc_filespec(p->two->path);
+ fill_filespec(two_post, p->two->sha1, p->two->mode);
+ diff_queue(&outq, two_pre, two_post);
+ diff_free_filepair(p);
+ }
+ else
+ diff_q(&outq, p);
+ }
+ free(q->queue);
+ *q = outq;
+
+ return;
+}
diff --git a/diffcore-rename.c b/diffcore-rename.c
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -208,8 +208,8 @@ static int score_compare(const void *a_,
int diff_scoreopt_parse(const char *opt)
{
int diglen, num, scale, i;
- if (opt[0] != '-' || (opt[1] != 'M' && opt[1] != 'C'))
- return -1; /* that is not a -M nor -C option */
+ if (opt[0] != '-' || (opt[1] != 'M' && opt[1] != 'C' && opt[1] != 'B'))
+ return -1; /* that is not a -M, -C nor -B option */
diglen = strspn(opt+2, "0123456789");
if (diglen == 0 || strlen(opt+2) != diglen)
return 0; /* use default */
diff --git a/diffcore.h b/diffcore.h
--- a/diffcore.h
+++ b/diffcore.h
@@ -9,7 +9,8 @@
* in anything else.
*/
#define MAX_SCORE 10000
-#define DEFAULT_MINIMUM_SCORE 5000
+#define DEFAULT_MINIMUM_SCORE 5000 /* rename/copy similarity minimum */
+#define DEFAULT_MAXIMUM_SCORE 1500 /* similarity maximum for break to happen */
#define RENAME_DST_MATCHED 01
------------------------------------------------
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] diff: move diffcore-break before diffcore-rename.
2005-05-28 19:33 [PATCH] Add -B flag to diff-* brothers Junio C Hamano
@ 2005-05-29 9:14 ` Junio C Hamano
2005-05-29 21:56 ` [PATCH] Add -B flag to diff-* brothers Petr Baudis
1 sibling, 0 replies; 4+ messages in thread
From: Junio C Hamano @ 2005-05-29 9:14 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Git Mailing List
The order diffcore-break is applied has changed. Earlier it
came fairly late in the diffcore chain, but now it comes before
diffcore-rename, which makes diffcore-break far more useful than
before.
As the new test t4008-diff-break-rewrite demonstrates, if a file
is a complete rewrite, it is broken and then can be subjected to
the usual rename detection. For example, if file0 gets
completely rewritten to make it as if it was rather based on
file1 which disappeared, diff-* -B -M would do the following:
The original change would look like this:
file0 --> file0' (quite different from file0)
file1 --> /dev/null
After diffcore-break, it would become this:
file0 --> /dev/null
/dev/null --> file0'
file1 --> /dev/null
Then diffcore-rename matches them up:
file1 --> file0'
The meaning of the parameter the -B option takes has changed
from the previous one. Earlier it meant the amount of source
materials permitted to remain for the file and still be
considered a rewrite, which meant that the lower the number, the
tighter the detection criteria. This was changed to match the
way the parameters to the -C and the -M options are used, where
larger parameter value means tighter detection criteria. The
parameter for the -B option now means how much new material must
be in the resulting file for the filepair to be considered a
rewrite, and defaults to 99%.
The internal score values are finer grained now. Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.
Documentation now mentions the -B option.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
Documentation/git-diff-cache.txt | 5 -
Documentation/git-diff-files.txt | 5 -
Documentation/git-diff-tree.txt | 5 -
diff.c | 20 +++-
diffcore-break.c | 101 +++++++++++++----------
diffcore-rename.c | 64 ++++++++++++--
diffcore.h | 13 ++-
t/t4008-diff-break-rewrite.sh | 169 +++++++++++++++++++++++++++++++++++++++
8 files changed, 317 insertions(+), 65 deletions(-)
new file (100755): t/t4008-diff-break-rewrite.sh
diff --git a/Documentation/git-diff-cache.txt b/Documentation/git-diff-cache.txt
--- a/Documentation/git-diff-cache.txt
+++ b/Documentation/git-diff-cache.txt
@@ -9,7 +9,7 @@ git-diff-cache - Compares content and mo
SYNOPSIS
--------
-'git-diff-cache' [-p] [-r] [-z] [-m] [-M] [-R] [-C] [-O<orderfile>] [-S<string>] [--pickaxe-all] [--cached] <tree-ish> [<path>...]
+'git-diff-cache' [-p] [-r] [-z] [-m] [-B] [-M] [-R] [-C] [-O<orderfile>] [-S<string>] [--pickaxe-all] [--cached] <tree-ish> [<path>...]
DESCRIPTION
-----------
@@ -35,6 +35,9 @@ OPTIONS
-z::
\0 line termination on output
+-B::
+ Break complete rewrite changes into pairs of delete and create.
+
-M::
Detect renames.
diff --git a/Documentation/git-diff-files.txt b/Documentation/git-diff-files.txt
--- a/Documentation/git-diff-files.txt
+++ b/Documentation/git-diff-files.txt
@@ -9,7 +9,7 @@ git-diff-files - Compares files in the w
SYNOPSIS
--------
-'git-diff-files' [-p] [-q] [-r] [-z] [-M] [-C] [-R] [-O<orderfile>] [-S<string>] [--pickaxe-all] [<pattern>...]
+'git-diff-files' [-p] [-q] [-r] [-z] [-B] [-M] [-C] [-R] [-O<orderfile>] [-S<string>] [--pickaxe-all] [<pattern>...]
DESCRIPTION
-----------
@@ -29,6 +29,9 @@ OPTIONS
-R::
Output diff in reverse.
+-B::
+ Break complete rewrite changes into pairs of delete and create.
+
-M::
Detect renames.
diff --git a/Documentation/git-diff-tree.txt b/Documentation/git-diff-tree.txt
--- a/Documentation/git-diff-tree.txt
+++ b/Documentation/git-diff-tree.txt
@@ -9,7 +9,7 @@ git-diff-tree - Compares the content and
SYNOPSIS
--------
-'git-diff-tree' [-p] [-r] [-z] [--stdin] [-M] [-R] [-C] [-O<orderfile>] [-S<string>] [--pickaxe-all] [-m] [-s] [-v] [-t] <tree-ish> <tree-ish> [<pattern>]\*
+'git-diff-tree' [-p] [-r] [-z] [--stdin] [-B] [-M] [-R] [-C] [-O<orderfile>] [-S<string>] [--pickaxe-all] [-m] [-s] [-v] [-t] <tree-ish> <tree-ish> [<pattern>]\*
DESCRIPTION
-----------
@@ -33,6 +33,9 @@ OPTIONS
generate patch (see section on generating patches). For
git-diff-tree, this flag implies '-r' as well.
+-B::
+ Break complete rewrite changes into pairs of delete and create.
+
-M::
Detect renames.
diff --git a/diff.c b/diff.c
--- a/diff.c
+++ b/diff.c
@@ -609,6 +609,7 @@ struct diff_filepair *diff_queue(struct
dp->two = two;
dp->score = 0;
dp->source_stays = 0;
+ dp->broken_pair = 0;
diff_q(queue, dp);
return dp;
}
@@ -643,6 +644,16 @@ static void diff_flush_raw(struct diff_f
sprintf(status, "%c%03d", p->status,
(int)(0.5 + p->score * 100.0/MAX_SCORE));
break;
+ case 'N': case 'D':
+ two_paths = 0;
+ if (p->score)
+ sprintf(status, "%c%03d", p->status,
+ (int)(0.5 + p->score * 100.0/MAX_SCORE));
+ else {
+ status[0] = p->status;
+ status[1] = 0;
+ }
+ break;
default:
two_paths = 0;
status[0] = p->status;
@@ -799,8 +810,9 @@ void diff_debug_filepair(const struct di
{
diff_debug_filespec(p->one, i, "one");
diff_debug_filespec(p->two, i, "two");
- fprintf(stderr, "score %d, status %c source_stays %d\n",
- p->score, p->status ? : '?', p->source_stays);
+ fprintf(stderr, "score %d, status %c stays %d broken %d\n",
+ p->score, p->status ? : '?',
+ p->source_stays, p->broken_pair);
}
void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
@@ -939,12 +951,12 @@ void diffcore_std(const char **paths,
{
if (paths)
diffcore_pathspec(paths);
+ if (0 <= break_opt)
+ diffcore_break(break_opt);
if (detect_rename)
diffcore_rename(detect_rename, rename_score);
if (pickaxe)
diffcore_pickaxe(pickaxe, pickaxe_opts);
- if (0 <= break_opt)
- diffcore_break(break_opt);
if (orderfile)
diffcore_order(orderfile);
}
diff --git a/diffcore-break.c b/diffcore-break.c
--- a/diffcore-break.c
+++ b/diffcore-break.c
@@ -9,13 +9,18 @@
static int very_different(struct diff_filespec *src,
struct diff_filespec *dst,
- int max_score)
+ int min_score)
{
/* dst is recorded as a modification of src. Are they so
* different that we are better off recording this as a pair
- * of delete and create? max_score is maximum size that is
- * common between src and dst for the pair to be still considered
- * a change (not delete and create), and typically set to 5-10%.
+ * of delete and create? min_score is the minimum amount of
+ * new material that must exist in the dst and not in src for
+ * the pair to be considered a complete rewrite, and recommended
+ * to be set to a very high value, 99% or so.
+ *
+ * The value we return represents the amount of new material
+ * that is in dst and not in src. We return 0 when we do not
+ * want to get the filepair broken.
*/
void *delta;
unsigned long delta_size, base_size;
@@ -28,15 +33,19 @@ static int very_different(struct diff_fi
delta_size = ((src->size < dst->size) ?
(dst->size - src->size) : (src->size - dst->size));
- base_size = ((src->size < dst->size) ? src->size : dst->size);
+
+ /* Notice that we use max of src and dst as the base size,
+ * unlike rename similarity detection. This is so that we do
+ * not mistake a large addition as a complete rewrite.
+ */
+ base_size = ((src->size < dst->size) ? dst->size : src->size);
/*
- * If delta size is larger than
- * (MAX_SCORE-max_score)/MAX_SCORE * min(src->size, dst->size)
- * then we declare this is too big a change to be a patch.
+ * If file size difference is too big compared to the
+ * base_size, we declare this a complete rewrite.
*/
- if (base_size * (MAX_SCORE-max_score) < delta_size * MAX_SCORE)
- return 1;
+ if (base_size * min_score < delta_size * MAX_SCORE)
+ return MAX_SCORE;
if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
return 0; /* error but caught downstream */
@@ -48,60 +57,68 @@ static int very_different(struct diff_fi
/* A delta that has a lot of literal additions would have
* big delta_size no matter what else it does.
*/
- if (base_size * (MAX_SCORE-max_score) < delta_size * MAX_SCORE)
- return 1;
+ if (base_size * min_score < delta_size * MAX_SCORE)
+ return MAX_SCORE;
/* Estimate the edit size by interpreting delta. */
delta_size = count_delta(delta, delta_size);
free(delta);
if (delta_size == UINT_MAX)
return 0; /* error in delta computation */
- /*
- * So how big is the edit?
- */
- if (base_size * (MAX_SCORE-max_score) < delta_size * MAX_SCORE)
- return 1;
- return 0;
+
+ if (base_size < delta_size)
+ return MAX_SCORE;
+
+ return delta_size * MAX_SCORE / base_size;
}
-void diffcore_break(int max_score)
+void diffcore_break(int min_score)
{
struct diff_queue_struct *q = &diff_queued_diff;
struct diff_queue_struct outq;
int i;
- if (!max_score)
- max_score = DEFAULT_MAXIMUM_SCORE;
+ if (!min_score)
+ min_score = DEFAULT_BREAK_SCORE;
outq.nr = outq.alloc = 0;
outq.queue = NULL;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
- /* We deal only with in-place edit of non directory */
+ int score;
+
+ /* We deal only with in-place edit of non directory.
+ * We do not break anything else.
+ */
if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) &&
!S_ISDIR(p->one->mode) && !S_ISDIR(p->two->mode) &&
- !strcmp(p->one->path, p->two->path) &&
- very_different(p->one, p->two, max_score)) {
- /* Split this into delete and create */
- struct diff_filespec *one_pre, *one_post,
- *two_pre, *two_post;
-
- /* deletion of one */
- one_pre = alloc_filespec(p->one->path);
- fill_filespec(one_pre, p->one->sha1, p->one->mode);
- one_post = alloc_filespec(p->one->path);
- diff_queue(&outq, one_pre, one_post);
-
- /* creation of two */
- two_pre = alloc_filespec(p->two->path);
- two_post = alloc_filespec(p->two->path);
- fill_filespec(two_post, p->two->sha1, p->two->mode);
- diff_queue(&outq, two_pre, two_post);
- diff_free_filepair(p);
+ !strcmp(p->one->path, p->two->path)) {
+ score = very_different(p->one, p->two, min_score);
+ if (min_score <= score) {
+ /* Split this into delete and create */
+ struct diff_filespec *null_one, *null_two;
+ struct diff_filepair *dp;
+
+ /* deletion of one */
+ null_one = alloc_filespec(p->one->path);
+ dp = diff_queue(&outq, p->one, null_one);
+ dp->score = score;
+ dp->broken_pair = 1;
+
+ /* creation of two */
+ null_two = alloc_filespec(p->two->path);
+ dp = diff_queue(&outq, null_two, p->two);
+ dp->score = score;
+ dp->broken_pair = 1;
+
+ free(p); /* not diff_free_filepair(), we are
+ * reusing one and two here.
+ */
+ continue;
+ }
}
- else
- diff_q(&outq, p);
+ diff_q(&outq, p);
}
free(q->queue);
*q = outq;
diff --git a/diffcore-rename.c b/diffcore-rename.c
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -232,7 +232,7 @@ void diffcore_rename(int detect_rename,
int num_create, num_src, dst_cnt;
if (!minimum_score)
- minimum_score = DEFAULT_MINIMUM_SCORE;
+ minimum_score = DEFAULT_RENAME_SCORE;
renq.queue = NULL;
renq.nr = renq.alloc = 0;
@@ -311,26 +311,66 @@ void diffcore_rename(int detect_rename,
outq.nr = outq.alloc = 0;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
- struct diff_rename_dst *dst = locate_rename_dst(p->two, 0);
struct diff_filepair *pair_to_free = NULL;
- if (dst) {
- /* creation */
- if (dst->pair) {
- /* renq has rename/copy to produce
- * this file already, so we do not
- * emit the creation record in the
- * output.
- */
+ if (!DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
+ /*
+ * Creation
+ *
+ * We would output this create record if it has
+ * not been turned into a rename/copy already.
+ */
+ struct diff_rename_dst *dst =
+ locate_rename_dst(p->two, 0);
+ if (dst && dst->pair) {
diff_q(&outq, dst->pair);
pair_to_free = p;
}
else
- /* no matching rename/copy source, so record
- * this as a creation.
+ /* no matching rename/copy source, so
+ * record this as a creation.
*/
diff_q(&outq, p);
}
+ else if (DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two)) {
+ /*
+ * Deletion
+ *
+ * We would output this delete record if:
+ *
+ * (1) this is a broken delete and the counterpart
+ * broken create remains in the output; or
+ * (2) this is not a broken delete, and renq does
+ * not have a rename/copy to create p->two->path.
+ *
+ * Otherwise, the counterpart broken create
+ * has been turned into a rename-edit; or
+ * delete did not have a matching create to
+ * begin with.
+ */
+ if (DIFF_PAIR_BROKEN(p)) {
+ /* broken delete */
+ struct diff_rename_dst *dst =
+ locate_rename_dst(p->one, 0);
+ if (dst && dst->pair)
+ /* counterpart is now rename/copy */
+ pair_to_free = p;
+ }
+ else {
+ for (j = 0; j < renq.nr; j++)
+ if (!strcmp(renq.queue[j]->two->path,
+ p->one->path))
+ break;
+ if (j < renq.nr)
+ /* this path remains */
+ pair_to_free = p;
+ }
+
+ if (pair_to_free)
+ ;
+ else
+ diff_q(&outq, p);
+ }
else if (!diff_unmodified_pair(p))
/* all the usual ones need to be kept */
diff_q(&outq, p);
diff --git a/diffcore.h b/diffcore.h
--- a/diffcore.h
+++ b/diffcore.h
@@ -8,9 +8,9 @@
* (e.g. diffcore-rename, diffcore-pickaxe). Never include this header
* in anything else.
*/
-#define MAX_SCORE 10000
-#define DEFAULT_MINIMUM_SCORE 5000 /* rename/copy similarity minimum */
-#define DEFAULT_MAXIMUM_SCORE 1500 /* similarity maximum for break to happen */
+#define MAX_SCORE 60000
+#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
+#define DEFAULT_BREAK_SCORE 59400 /* minimum for break to happen (99%)*/
#define RENAME_DST_MATCHED 01
@@ -43,14 +43,19 @@ struct diff_filepair {
struct diff_filespec *one;
struct diff_filespec *two;
unsigned short int score;
- char source_stays; /* all of R/C are copies */
char status; /* M C R N D U (see Documentation/diff-format.txt) */
+ unsigned source_stays : 1; /* all of R/C are copies */
+ unsigned broken_pair : 1;
};
#define DIFF_PAIR_UNMERGED(p) \
(!DIFF_FILE_VALID((p)->one) && !DIFF_FILE_VALID((p)->two))
#define DIFF_PAIR_RENAME(p) (strcmp((p)->one->path, (p)->two->path))
+#define DIFF_PAIR_BROKEN(p) \
+ ( (!DIFF_FILE_VALID((p)->one) != !DIFF_FILE_VALID((p)->two)) && \
+ ((p)->broken_pair != 0) )
+
#define DIFF_PAIR_TYPE_CHANGED(p) \
((S_IFMT & (p)->one->mode) != (S_IFMT & (p)->two->mode))
diff --git a/t/t4008-diff-break-rewrite.sh b/t/t4008-diff-break-rewrite.sh
new file mode 100755
--- /dev/null
+++ b/t/t4008-diff-break-rewrite.sh
@@ -0,0 +1,169 @@
+#!/bin/sh
+#
+# Copyright (c) 2005 Junio C Hamano
+#
+
+test_description='Break and then rename
+
+We have two very different files, file0 and file1, registered in a tree.
+
+We update file1 so drastically that it is more similar to file0, and
+then remove file0. With -B, changes to file1 should be broken into
+separate delete and create, resulting in removal of file0, removal of
+original file1 and creation of completely rewritten file1.
+
+Further, with -B and -M together, these three modifications should
+turn into rename-edit of file0 into file1.
+
+Starting from the same two files in the tree, we swap file0 and file1.
+With -B, this should be detected as two complete rewrites, resulting in
+four changes in total.
+
+Further, with -B and -M together, these should turn into two renames.
+'
+. ./test-lib.sh
+
+_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
+_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
+sanitize_diff_raw='s/ '"$_x40"' '"$_x40"' \([CDNR]\)[0-9]* / X X \1# /'
+compare_diff_raw () {
+ # When heuristics are improved, the score numbers would change.
+ # Ignore them while comparing.
+ # Also we do not check SHA1 hash generation in this test, which
+ # is a job for t0000-basic.sh
+
+ sed -e "$sanitize_diff_raw" <"$1" >.tmp-1
+ sed -e "$sanitize_diff_raw" <"$2" >.tmp-2
+ diff -u .tmp-1 .tmp-2 && rm -f .tmp-1 .tmp-2
+}
+
+test_expect_success \
+ setup \
+ 'cat ../../README >file0 &&
+ cat ../../COPYING >file1 &&
+ git-update-cache --add file0 file1 &&
+ tree=$(git-write-tree) &&
+ echo "$tree"'
+
+test_expect_success \
+ 'change file1 with copy-edit of file0 and remove file0' \
+ 'sed -e "s/git/GIT/" file0 >file1 &&
+ rm -f file0 &&
+ git-update-cache --remove file0 file1'
+
+test_expect_success \
+ 'run diff with -B' \
+ 'git-diff-cache -B --cached "$tree" >current'
+
+cat >expected <<\EOF
+:100644 000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 0000000000000000000000000000000000000000 D file0
+:100644 000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0000000000000000000000000000000000000000 D100 file1
+:000000 100644 0000000000000000000000000000000000000000 11e331465a89c394dc25c780de230043750c1ec8 N100 file1
+EOF
+
+test_expect_success \
+ 'validate result of -B (#1)' \
+ 'compare_diff_raw current expected'
+
+test_expect_success \
+ 'run diff with -B and -M' \
+ 'git-diff-cache -B -M "$tree" >current'
+
+cat >expected <<\EOF
+:100644 100644 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 08bb2fb671deff4c03a4d4a0a1315dff98d5732c R100 file0 file1
+EOF
+
+test_expect_success \
+ 'validate result of -B -M (#2)' \
+ 'compare_diff_raw current expected'
+
+test_expect_success \
+ 'swap file0 and file1' \
+ 'rm -f file0 file1 &&
+ git-read-tree -m $tree &&
+ git-checkout-cache -f -u -a &&
+ mv file0 tmp &&
+ mv file1 file0 &&
+ mv tmp file1 &&
+ git-update-cache file0 file1'
+
+test_expect_success \
+ 'run diff with -B' \
+ 'git-diff-cache -B "$tree" >current'
+
+cat >expected <<\EOF
+:100644 000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 0000000000000000000000000000000000000000 D100 file0
+:000000 100644 0000000000000000000000000000000000000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 N100 file0
+:100644 000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0000000000000000000000000000000000000000 D100 file1
+:000000 100644 0000000000000000000000000000000000000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 N100 file1
+EOF
+
+test_expect_success \
+ 'validate result of -B (#3)' \
+ 'compare_diff_raw current expected'
+
+test_expect_success \
+ 'run diff with -B and -M' \
+ 'git-diff-cache -B -M "$tree" >current'
+
+cat >expected <<\EOF
+:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 6ff87c4664981e4397625791c8ea3bbb5f2279a3 R100 file1 file0
+:100644 100644 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 R100 file0 file1
+EOF
+
+test_expect_success \
+ 'validate result of -B -M (#4)' \
+ 'compare_diff_raw current expected'
+
+test_expect_success \
+ 'make file0 into something completely different' \
+ 'rm -f file0 &&
+ ln -s frotz file0 &&
+ git-update-cache file0 file1'
+
+test_expect_success \
+ 'run diff with -B' \
+ 'git-diff-cache -B "$tree" >current'
+
+cat >expected <<\EOF
+:100644 120000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 67be421f88824578857624f7b3dc75e99a8a1481 T file0
+:100644 000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0000000000000000000000000000000000000000 D100 file1
+:000000 100644 0000000000000000000000000000000000000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 N100 file1
+EOF
+
+test_expect_success \
+ 'validate result of -B (#5)' \
+ 'compare_diff_raw current expected'
+
+test_expect_success \
+ 'run diff with -B' \
+ 'git-diff-cache -B -M "$tree" >current'
+
+# This should not mistake file0 as the copy source of new file1
+# due to type differences.
+cat >expected <<\EOF
+:100644 120000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 67be421f88824578857624f7b3dc75e99a8a1481 T file0
+:100644 000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0000000000000000000000000000000000000000 D100 file1
+:000000 100644 0000000000000000000000000000000000000000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 N100 file1
+EOF
+
+test_expect_success \
+ 'validate result of -B -M (#6)' \
+ 'compare_diff_raw current expected'
+
+test_expect_success \
+ 'run diff with -M' \
+ 'git-diff-cache -M "$tree" >current'
+
+# This should not mistake file0 as the copy source of new file1
+# due to type differences.
+cat >expected <<\EOF
+:100644 120000 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 67be421f88824578857624f7b3dc75e99a8a1481 T file0
+:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 f5deac7be59e7eeab8657fd9ae706fd6a57daed2 M file1
+EOF
+
+test_expect_success \
+ 'validate result of -M (#7)' \
+ 'compare_diff_raw current expected'
+
+test_done
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Add -B flag to diff-* brothers.
2005-05-28 19:33 [PATCH] Add -B flag to diff-* brothers Junio C Hamano
2005-05-29 9:14 ` [PATCH] diff: move diffcore-break before diffcore-rename Junio C Hamano
@ 2005-05-29 21:56 ` Petr Baudis
2005-05-29 23:51 ` Junio C Hamano
1 sibling, 1 reply; 4+ messages in thread
From: Petr Baudis @ 2005-05-29 21:56 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Linus Torvalds, Git Mailing List
Dear diary, on Sat, May 28, 2005 at 09:33:44PM CEST, I got a letter
where Junio C Hamano <junkio@cox.net> told me that...
> This introduces diffcore-break.c, a new diffcore
> transformation.
>
> When the -B flag is given, a patch that represents a complete
> rewrite is broken into a deletion followed by a creation. This
> makes it easier to review such a complete rewrite patch.
>
> The -B flag takes the same syntax as the -M and -C flags to
> specify the maximum amount of source material the resulting file
> can still have to be considered a complete rewrite, and defaults
> to 15% if not specified. For example, this detects the complete
> rewrite of ls-tree.c I sent earlier.
>
> Signed-off-by: Junio C Hamano <junkio@cox.net>
Actually, I like this one - contrary to -O I can see how this could be
quite useful - I have wished for this many times when people would send
me some "complete rewrite" patches which I actually wanted to review.
Please don't give up on it. :-)
--
Petr "Pasky" Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Add -B flag to diff-* brothers.
2005-05-29 21:56 ` [PATCH] Add -B flag to diff-* brothers Petr Baudis
@ 2005-05-29 23:51 ` Junio C Hamano
0 siblings, 0 replies; 4+ messages in thread
From: Junio C Hamano @ 2005-05-29 23:51 UTC (permalink / raw)
To: Petr Baudis; +Cc: Linus Torvalds, Git Mailing List
>>>>> "PB" == Petr Baudis <pasky@ucw.cz> writes:
PB> Actually, I like this one - contrary to -O I can see how this could be
PB> quite useful - I have wished for this many times when people would send
PB> me some "complete rewrite" patches which I actually wanted to review.
PB> Please don't give up on it. :-)
Well, although I do not do Porcelain ;-), I do want to have -O
to help my use pattern. I envision that Porcelain noticing the
existence of ${GIT-.git}/patch-order file and adding -O to its
diff-* argument would make the world a better place.
And I am not giving up on -B yet, but as you can imagine, it has
interesting interaction with -M/-C, since rename detection code
must be prepared to deal with broken pairs (earlier it did not
have to worry about the same path that is a regular file
appearing twice in its input). Since I have tested -B only in
the context of full set of patches I sent out, and have not
tested it with only the subset Linus decided to swallow, I would
recommend against applying it on top of the Linus tip as is.
I'll be rebasing it to his tip, test it again and then resubmit
later. I'll do the same for -O changes.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2005-05-29 23:48 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-05-28 19:33 [PATCH] Add -B flag to diff-* brothers Junio C Hamano
2005-05-29 9:14 ` [PATCH] diff: move diffcore-break before diffcore-rename Junio C Hamano
2005-05-29 21:56 ` [PATCH] Add -B flag to diff-* brothers Petr Baudis
2005-05-29 23:51 ` Junio C Hamano
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).