git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* A bug in git 1.6.5.2 with git log --stat: shows a negative number as a size
@ 2010-04-16 13:59 Heikki Orsila
  2010-04-16 15:02 ` Tomas Carnecky
  0 siblings, 1 reply; 5+ messages in thread
From: Heikki Orsila @ 2010-04-16 13:59 UTC (permalink / raw)
  To: git

I'm running git version 1.6.5.2. git log --stat shows a negative
diffstat size for two files that are each 2049MiB in size.

Steps to reproduce:

$ for f in 0 1 ; do dd bs=$((1024*1024)) if=/dev/zero of=$f count=2049 ; done
$ git add 0 1
$ git commit -m "test commit"
$ git log --stat
commit 6afe3d3c889daa92bd79956c4bb733eb5cb408dc
Author: Heikki Orsila <heikki.orsila@iki.fi>
Date:   2010-04-16 16:54:52 +0300

    test commit

 0 |  Bin 0 -> -2146435072 bytes
 1 |  Bin 0 -> -2146435072 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)

-- 
Heikki Orsila
heikki.orsila@iki.fi
http://www.iki.fi/shd

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: A bug in git 1.6.5.2 with git log --stat: shows a negative number as a size
  2010-04-16 13:59 A bug in git 1.6.5.2 with git log --stat: shows a negative number as a size Heikki Orsila
@ 2010-04-16 15:02 ` Tomas Carnecky
  2010-04-17 10:25   ` [PATCH] diff: use 64-bit integers for diffstat calculations Jeff King
  0 siblings, 1 reply; 5+ messages in thread
From: Tomas Carnecky @ 2010-04-16 15:02 UTC (permalink / raw)
  To: Heikki Orsila; +Cc: git

On 4/16/10 3:59 PM, Heikki Orsila wrote:
> I'm running git version 1.6.5.2. git log --stat shows a negative
> diffstat size for two files that are each 2049MiB in size.
>
> Steps to reproduce:
>
> $ for f in 0 1 ; do dd bs=$((1024*1024)) if=/dev/zero of=$f count=2049 ; done
> $ git add 0 1
> $ git commit -m "test commit"
> $ git log --stat
> commit 6afe3d3c889daa92bd79956c4bb733eb5cb408dc
> Author: Heikki Orsila<heikki.orsila@iki.fi>
> Date:   2010-04-16 16:54:52 +0300
>
>      test commit
>
>   0 |  Bin 0 ->  -2146435072 bytes
>   1 |  Bin 0 ->  -2146435072 bytes
>   2 files changed, 0 insertions(+), 0 deletions(-)

Yep, bug is also in the latest version (1.7.0.5). The code uses 'int' 
instead of something big enough to hold the size of your files.

http://git.kernel.org/?p=git/git.git;a=blob;f=diff.c;h=a1bf1e9cb37104cda8168c5118769ce5bbcfcbb2;hb=HEAD#l1105

and a couple lines below (1124) you see that the stat is printed out.

tom

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] diff: use 64-bit integers for diffstat calculations
  2010-04-16 15:02 ` Tomas Carnecky
@ 2010-04-17 10:25   ` Jeff King
  2010-04-17 17:00     ` Junio C Hamano
  0 siblings, 1 reply; 5+ messages in thread
From: Jeff King @ 2010-04-17 10:25 UTC (permalink / raw)
  To: Tomas Carnecky; +Cc: Junio C Hamano, Heikki Orsila, git

On Fri, Apr 16, 2010 at 05:02:01PM +0200, Tomas Carnecky wrote:

> >commit 6afe3d3c889daa92bd79956c4bb733eb5cb408dc
> >Author: Heikki Orsila<heikki.orsila@iki.fi>
> >Date:   2010-04-16 16:54:52 +0300
> >
> >     test commit
> >
> >  0 |  Bin 0 ->  -2146435072 bytes
> >  1 |  Bin 0 ->  -2146435072 bytes
> >  2 files changed, 0 insertions(+), 0 deletions(-)
> 
> Yep, bug is also in the latest version (1.7.0.5). The code uses 'int'
> instead of something big enough to hold the size of your files.

Yuck, we use "unsigned int" for the actual storage, and then convert to
a regular "int" in some other places. I think we should just do this:

-- >8 --
Subject: [PATCH] diff: use 64-bit integers for diffstat calculations

The diffstat "added" and "changed" fields generally store
line counts; however, for binary files, they store file
sizes. Since we store and print these values as ints, a
diffstat on a file larger than 2G can show a negative size.
Instead, let's explicitly use 64-bit integers.

Signed-off-by: Jeff King <peff@peff.net>
---
 diff.c |   21 ++++++++++++---------
 1 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/diff.c b/diff.c
index 7effdac..54933cc 100644
--- a/diff.c
+++ b/diff.c
@@ -936,7 +936,7 @@ struct diffstat_t {
 		unsigned is_unmerged:1;
 		unsigned is_binary:1;
 		unsigned is_renamed:1;
-		unsigned int added, deleted;
+		uint64_t added, deleted;
 	} **files;
 };
 
@@ -1028,7 +1028,7 @@ static void fill_print_name(struct diffstat_file *file)
 static void show_stats(struct diffstat_t *data, struct diff_options *options)
 {
 	int i, len, add, del, adds = 0, dels = 0;
-	int max_change = 0, max_len = 0;
+	uint64_t max_change = 0, max_len = 0;
 	int total_files = data->nr;
 	int width, name_width;
 	const char *reset, *set, *add_c, *del_c;
@@ -1057,7 +1057,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 
 	for (i = 0; i < data->nr; i++) {
 		struct diffstat_file *file = data->files[i];
-		int change = file->added + file->deleted;
+		uint64_t change = file->added + file->deleted;
 		fill_print_name(file);
 		len = strlen(file->print_name);
 		if (max_len < len)
@@ -1085,8 +1085,8 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	for (i = 0; i < data->nr; i++) {
 		const char *prefix = "";
 		char *name = data->files[i]->print_name;
-		int added = data->files[i]->added;
-		int deleted = data->files[i]->deleted;
+		uint64_t added = data->files[i]->added;
+		uint64_t deleted = data->files[i]->deleted;
 		int name_len;
 
 		/*
@@ -1107,9 +1107,11 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		if (data->files[i]->is_binary) {
 			show_name(options->file, prefix, name, len);
 			fprintf(options->file, "  Bin ");
-			fprintf(options->file, "%s%d%s", del_c, deleted, reset);
+			fprintf(options->file, "%s%"PRIu64"%s",
+				del_c, deleted, reset);
 			fprintf(options->file, " -> ");
-			fprintf(options->file, "%s%d%s", add_c, added, reset);
+			fprintf(options->file, "%s%"PRIu64"%s",
+				add_c, added, reset);
 			fprintf(options->file, " bytes");
 			fprintf(options->file, "\n");
 			continue;
@@ -1138,7 +1140,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 			del = scale_linear(del, width, max_change);
 		}
 		show_name(options->file, prefix, name, len);
-		fprintf(options->file, "%5d%s", added + deleted,
+		fprintf(options->file, "%5"PRIu64"%s", added + deleted,
 				added + deleted ? " " : "");
 		show_graph(options->file, '+', add, add_c, reset);
 		show_graph(options->file, '-', del, del_c, reset);
@@ -1188,7 +1190,8 @@ static void show_numstat(struct diffstat_t *data, struct diff_options *options)
 			fprintf(options->file, "-\t-\t");
 		else
 			fprintf(options->file,
-				"%d\t%d\t", file->added, file->deleted);
+				"%"PRIu64"\t%"PRIu64"\t",
+				file->added, file->deleted);
 		if (options->line_termination) {
 			fill_print_name(file);
 			if (!file->is_renamed)
-- 
1.7.1.rc1.277.g2c4c9

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] diff: use 64-bit integers for diffstat calculations
  2010-04-17 10:25   ` [PATCH] diff: use 64-bit integers for diffstat calculations Jeff King
@ 2010-04-17 17:00     ` Junio C Hamano
  2010-04-17 17:41       ` Jeff King
  0 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2010-04-17 17:00 UTC (permalink / raw)
  To: Jeff King; +Cc: Tomas Carnecky, Heikki Orsila, git

Jeff King <peff@peff.net> writes:

> Yuck, we use "unsigned int" for the actual storage, and then convert to
> a regular "int" in some other places. I think we should just do this:
>
> -- >8 --
> Subject: [PATCH] diff: use 64-bit integers for diffstat calculations
>
> The diffstat "added" and "changed" fields generally store
> line counts; however, for binary files, they store file
> sizes. Since we store and print these values as ints, a
> diffstat on a file larger than 2G can show a negative size.
> Instead, let's explicitly use 64-bit integers.
>
> Signed-off-by: Jeff King <peff@peff.net>
> ---

Yes, but we would probably be better off using using uintmax_t for things
like this if the quantity a variable represents is not closely tied to
external file format (e.g. the offset field of pack idx file), nor the
code is only for a particular platform (e.g. compat/win32mmap.c), don't
you think?

That is the impression I am getting on the discipline expressed in the
current codebase, from browsing the output from "git grep uint64_t".

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] diff: use 64-bit integers for diffstat calculations
  2010-04-17 17:00     ` Junio C Hamano
@ 2010-04-17 17:41       ` Jeff King
  0 siblings, 0 replies; 5+ messages in thread
From: Jeff King @ 2010-04-17 17:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Tomas Carnecky, Heikki Orsila, git

On Sat, Apr 17, 2010 at 10:00:44AM -0700, Junio C Hamano wrote:

> > The diffstat "added" and "changed" fields generally store
> > line counts; however, for binary files, they store file
> > sizes. Since we store and print these values as ints, a
> > diffstat on a file larger than 2G can show a negative size.
> > Instead, let's explicitly use 64-bit integers.
> 
> Yes, but we would probably be better off using using uintmax_t for things
> like this if the quantity a variable represents is not closely tied to
> external file format (e.g. the offset field of pack idx file), nor the
> code is only for a particular platform (e.g. compat/win32mmap.c), don't
> you think?

But 640K^W 64-bits should be large enough for anyone! :)

But yes, you're right. Updated patch below.

-- >8 --
Subject: [PATCH] diff: use large integers for diffstat calculations

The diffstat "added" and "changed" fields generally store
line counts; however, for binary files, they store file
sizes. Since we store and print these values as ints, a
diffstat on a file larger than 2G can show a negative size.
Instead, let's use uintmax_t, which should be at least 64
bits on modern platforms.

Signed-off-by: Jeff King <peff@peff.net>
---
 diff.c |   21 ++++++++++++---------
 1 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/diff.c b/diff.c
index 7effdac..fbdbd8d 100644
--- a/diff.c
+++ b/diff.c
@@ -936,7 +936,7 @@ struct diffstat_t {
 		unsigned is_unmerged:1;
 		unsigned is_binary:1;
 		unsigned is_renamed:1;
-		unsigned int added, deleted;
+		uintmax_t added, deleted;
 	} **files;
 };
 
@@ -1028,7 +1028,7 @@ static void fill_print_name(struct diffstat_file *file)
 static void show_stats(struct diffstat_t *data, struct diff_options *options)
 {
 	int i, len, add, del, adds = 0, dels = 0;
-	int max_change = 0, max_len = 0;
+	uintmax_t max_change = 0, max_len = 0;
 	int total_files = data->nr;
 	int width, name_width;
 	const char *reset, *set, *add_c, *del_c;
@@ -1057,7 +1057,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 
 	for (i = 0; i < data->nr; i++) {
 		struct diffstat_file *file = data->files[i];
-		int change = file->added + file->deleted;
+		uintmax_t change = file->added + file->deleted;
 		fill_print_name(file);
 		len = strlen(file->print_name);
 		if (max_len < len)
@@ -1085,8 +1085,8 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 	for (i = 0; i < data->nr; i++) {
 		const char *prefix = "";
 		char *name = data->files[i]->print_name;
-		int added = data->files[i]->added;
-		int deleted = data->files[i]->deleted;
+		uintmax_t added = data->files[i]->added;
+		uintmax_t deleted = data->files[i]->deleted;
 		int name_len;
 
 		/*
@@ -1107,9 +1107,11 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		if (data->files[i]->is_binary) {
 			show_name(options->file, prefix, name, len);
 			fprintf(options->file, "  Bin ");
-			fprintf(options->file, "%s%d%s", del_c, deleted, reset);
+			fprintf(options->file, "%s%"PRIuMAX"%s",
+				del_c, deleted, reset);
 			fprintf(options->file, " -> ");
-			fprintf(options->file, "%s%d%s", add_c, added, reset);
+			fprintf(options->file, "%s%"PRIuMAX"%s",
+				add_c, added, reset);
 			fprintf(options->file, " bytes");
 			fprintf(options->file, "\n");
 			continue;
@@ -1138,7 +1140,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 			del = scale_linear(del, width, max_change);
 		}
 		show_name(options->file, prefix, name, len);
-		fprintf(options->file, "%5d%s", added + deleted,
+		fprintf(options->file, "%5"PRIuMAX"%s", added + deleted,
 				added + deleted ? " " : "");
 		show_graph(options->file, '+', add, add_c, reset);
 		show_graph(options->file, '-', del, del_c, reset);
@@ -1188,7 +1190,8 @@ static void show_numstat(struct diffstat_t *data, struct diff_options *options)
 			fprintf(options->file, "-\t-\t");
 		else
 			fprintf(options->file,
-				"%d\t%d\t", file->added, file->deleted);
+				"%"PRIuMAX"\t%"PRIuMAX"\t",
+				file->added, file->deleted);
 		if (options->line_termination) {
 			fill_print_name(file);
 			if (!file->is_renamed)
-- 
1.7.1.rc1.277.g2c4c9

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-04-17 17:41 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-16 13:59 A bug in git 1.6.5.2 with git log --stat: shows a negative number as a size Heikki Orsila
2010-04-16 15:02 ` Tomas Carnecky
2010-04-17 10:25   ` [PATCH] diff: use 64-bit integers for diffstat calculations Jeff King
2010-04-17 17:00     ` Junio C Hamano
2010-04-17 17:41       ` Jeff King

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).