* [PATCH] diff.c: locate_size_cache() fix.
@ 2005-06-04 6:02 ` Junio C Hamano
2005-06-05 20:40 ` Petr Baudis
0 siblings, 1 reply; 4+ messages in thread
From: Junio C Hamano @ 2005-06-04 6:02 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
This fixes two bugs.
- declaration of auto variable "cmp" was preceeded by a
statement, causing compilation error on real C compilers;
noticed and patch given by Yoichi Yuasa.
- the function's calling convention was overloading its size
parameter to mean "largest possible value means do not add
entry", which was a bad taste. Brought up during a
discussion with Peter Baudis.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
diff.c | 11 ++++++-----
1 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/diff.c b/diff.c
--- a/diff.c
+++ b/diff.c
@@ -236,6 +236,7 @@ static struct sha1_size_cache {
static int sha1_size_cache_nr, sha1_size_cache_alloc;
static struct sha1_size_cache *locate_size_cache(unsigned char *sha1,
+ int find_only,
unsigned long size)
{
int first, last;
@@ -244,9 +245,9 @@ static struct sha1_size_cache *locate_si
first = 0;
last = sha1_size_cache_nr;
while (last > first) {
- int next = (last + first) >> 1;
+ int cmp, next = (last + first) >> 1;
e = sha1_size_cache[next];
- int cmp = memcmp(e->sha1, sha1, 20);
+ cmp = memcmp(e->sha1, sha1, 20);
if (!cmp)
return e;
if (cmp < 0) {
@@ -256,7 +257,7 @@ static struct sha1_size_cache *locate_si
first = next+1;
}
/* not found */
- if (size == UINT_MAX)
+ if (find_only)
return NULL;
/* insert to make it at "first" */
if (sha1_size_cache_alloc <= sha1_size_cache_nr) {
@@ -337,13 +338,13 @@ int diff_populate_filespec(struct diff_f
struct sha1_size_cache *e;
if (size_only) {
- e = locate_size_cache(s->sha1, UINT_MAX);
+ e = locate_size_cache(s->sha1, 1, 0);
if (e) {
s->size = e->size;
return 0;
}
if (!sha1_file_size(s->sha1, &s->size))
- locate_size_cache(s->sha1, s->size);
+ locate_size_cache(s->sha1, 0, s->size);
}
else {
s->data = read_sha1_file(s->sha1, type, &s->size);
------------
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] diff.c: -B argument passing fix.
@ 2005-06-04 6:04 ` Junio C Hamano
2005-06-04 6:02 ` [PATCH] diff.c: locate_size_cache() fix Junio C Hamano
0 siblings, 1 reply; 4+ messages in thread
From: Junio C Hamano @ 2005-06-04 6:04 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
This fixes a bug that was preventing non-default parameter to -B
option to be passed correctly; you could not give more than 50%
break score.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
diff.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/diff.c b/diff.c
--- a/diff.c
+++ b/diff.c
@@ -964,11 +964,11 @@ void diffcore_std(const char **paths,
{
if (paths && paths[0])
diffcore_pathspec(paths);
- if (0 <= break_opt)
+ if (break_opt != -1)
diffcore_break(break_opt);
if (detect_rename)
diffcore_rename(detect_rename, rename_score);
- if (0 <= break_opt)
+ if (break_opt != -1)
diffcore_merge_broken();
if (pickaxe)
diffcore_pickaxe(pickaxe, pickaxe_opts);
------------
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] diffcore-break.c: various fixes.
@ 2005-06-04 6:05 Junio C Hamano
2005-06-04 6:04 ` [PATCH] diff.c: -B argument passing fix Junio C Hamano
0 siblings, 1 reply; 4+ messages in thread
From: Junio C Hamano @ 2005-06-04 6:05 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
This fixes three bugs in the -B heuristics.
- Although it was advertised that the initial break criteria
used was the same as what diffcore-rename uses, it was using
something different. Instead of using smaller of src and dst
size to compare with "edit" size, (insertion and deletion),
it was using larger of src and dst, unlike the rename/copy
detection logic. This caused the parameter to -B to mean
something different from the one to -M and -C. To compensate
for this change, the default break score is also changed to
match that of the default for rename/copy.
- The code would have crashed with division by zero when trying
to break an originally empty file.
- Contrary to what the comment said, the algorithm was breaking
small files, only to later merge them together.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
diffcore.h | 2 +-
diffcore-break.c | 22 ++++++++--------------
2 files changed, 9 insertions(+), 15 deletions(-)
diff --git a/diffcore.h b/diffcore.h
--- a/diffcore.h
+++ b/diffcore.h
@@ -17,7 +17,7 @@
*/
#define MAX_SCORE 60000
#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
-#define DEFAULT_BREAK_SCORE 59400 /* minimum for break to happen (99%)*/
+#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%)*/
#define DEFAULT_MERGE_SCORE 48000 /* maximum for break-merge to happen (80%)*/
#define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */
diff --git a/diffcore-break.c b/diffcore-break.c
--- a/diffcore-break.c
+++ b/diffcore-break.c
@@ -61,14 +61,7 @@ static int should_break(struct diff_file
if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
return 0; /* error but caught downstream */
- delta_size = ((src->size < dst->size) ?
- (dst->size - src->size) : (src->size - dst->size));
-
- /* Notice that we use max of src and dst as the base size,
- * unlike rename similarity detection. This is so that we do
- * not mistake a large addition as a complete rewrite.
- */
- base_size = ((src->size < dst->size) ? dst->size : src->size);
+ base_size = ((src->size < dst->size) ? src->size : dst->size);
delta = diff_delta(src->data, src->size,
dst->data, dst->size,
@@ -88,10 +81,11 @@ static int should_break(struct diff_file
* less than the minimum, after rename/copy runs.
*/
if (src->size <= src_copied)
- delta_size = 0; /* avoid wrapping around */
- else
+ ; /* all copied, nothing removed */
+ else {
delta_size = src->size - src_copied;
- *merge_score_p = delta_size * MAX_SCORE / src->size;
+ *merge_score_p = delta_size * MAX_SCORE / src->size;
+ }
/* Extent of damage, which counts both inserts and
* deletes.
@@ -174,7 +168,8 @@ void diffcore_break(int break_score)
!S_ISDIR(p->one->mode) && !S_ISDIR(p->two->mode) &&
!strcmp(p->one->path, p->two->path)) {
if (should_break(p->one, p->two,
- break_score, &score)) {
+ break_score, &score) &&
+ MINIMUM_BREAK_SIZE <= p->one->size) {
/* Split this into delete and create */
struct diff_filespec *null_one, *null_two;
struct diff_filepair *dp;
@@ -185,8 +180,7 @@ void diffcore_break(int break_score)
* Also we do not want to break very
* small files.
*/
- if ((score < merge_score) ||
- (p->one->size < MINIMUM_BREAK_SIZE))
+ if (score < merge_score)
score = 0;
/* deletion of one */
------------
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] diff.c: locate_size_cache() fix.
2005-06-04 6:02 ` [PATCH] diff.c: locate_size_cache() fix Junio C Hamano
@ 2005-06-05 20:40 ` Petr Baudis
0 siblings, 0 replies; 4+ messages in thread
From: Petr Baudis @ 2005-06-05 20:40 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Junio C Hamano, git
> This fixes two bugs.
>
> - declaration of auto variable "cmp" was preceeded by a
> statement, causing compilation error on real C compilers;
> noticed and patch given by Yoichi Yuasa.
>
> - the function's calling convention was overloading its size
> parameter to mean "largest possible value means do not add
> entry", which was a bad taste. Brought up during a
> discussion with Peter Baudis.
>
> Signed-off-by: Junio C Hamano <junkio@cox.net>
> This fixes a bug that was preventing non-default parameter to -B
> option to be passed correctly; you could not give more than 50%
> break score.
>
> Signed-off-by: Junio C Hamano <junkio@cox.net>
> This fixes three bugs in the -B heuristics.
>
> - Although it was advertised that the initial break criteria
> used was the same as what diffcore-rename uses, it was using
> something different. Instead of using smaller of src and dst
> size to compare with "edit" size, (insertion and deletion),
> it was using larger of src and dst, unlike the rename/copy
> detection logic. This caused the parameter to -B to mean
> something different from the one to -M and -C. To compensate
> for this change, the default break score is also changed to
> match that of the default for rename/copy.
>
> - The code would have crashed with division by zero when trying
> to break an originally empty file.
>
> - Contrary to what the comment said, the algorithm was breaking
> small files, only to later merge them together.
>
> Signed-off-by: Junio C Hamano <junkio@cox.net>
Hello,
what's up with those three fixes? Were they pointed out to be wrong
and thrown away in another thread, are they on hold or just dropped?
Should I just apply them to git-pb?
Thanks,
--
Petr "Pasky" Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2005-06-05 20:37 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-06-04 6:05 [PATCH] diffcore-break.c: various fixes Junio C Hamano
2005-06-04 6:04 ` [PATCH] diff.c: -B argument passing fix Junio C Hamano
2005-06-04 6:02 ` [PATCH] diff.c: locate_size_cache() fix Junio C Hamano
2005-06-05 20:40 ` Petr Baudis
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).