git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] index-pack: smarter memory usage when resolving deltas
@ 2010-04-12  2:57 Nicolas Pitre
  2010-04-12  2:57 ` [PATCH 2/2] index-pack: rationalize unpack_entry_data() Nicolas Pitre
  2010-04-12  6:30 ` [PATCH 1/2] index-pack: smarter memory usage when resolving deltas Johannes Sixt
  0 siblings, 2 replies; 6+ messages in thread
From: Nicolas Pitre @ 2010-04-12  2:57 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

In the same spirit as commit 9892bebafe, let's avoid allocating the full
buffer for the deflated data in get_data_from_pack() in order to inflate
it.  Let's read and inflate the data in chunks instead to reduce memory
usage.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 builtin/index-pack.c |   39 +++++++++++++++++++--------------------
 1 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index b4cf8c5..c746d3b 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -359,34 +359,33 @@ static void *get_data_from_pack(struct object_entry *obj)
 {
 	off_t from = obj[0].idx.offset + obj[0].hdr_size;
 	unsigned long len = obj[1].idx.offset - from;
-	unsigned long rdy = 0;
-	unsigned char *src, *data;
+	unsigned char *data, inbuf[4096];
 	z_stream stream;
-	int st;
+	int status;
 
-	src = xmalloc(len);
-	data = src;
-	do {
-		ssize_t n = pread(pack_fd, data + rdy, len - rdy, from + rdy);
-		if (n < 0)
-			die_errno("cannot pread pack file");
-		if (!n)
-			die("premature end of pack file, %lu bytes missing",
-			    len - rdy);
-		rdy += n;
-	} while (rdy < len);
 	data = xmalloc(obj->size);
 	memset(&stream, 0, sizeof(stream));
+	git_inflate_init(&stream);
 	stream.next_out = data;
 	stream.avail_out = obj->size;
-	stream.next_in = src;
-	stream.avail_in = len;
-	git_inflate_init(&stream);
-	while ((st = git_inflate(&stream, Z_FINISH)) == Z_OK);
+
+	do {
+		ssize_t n = (len < sizeof(inbuf)) ? len : sizeof(inbuf);
+		n = pread(pack_fd, inbuf, n, from);
+		if (n < 0)
+			die_errno("cannot pread pack file");
+		if (!n)
+			die("premature end of pack file, %lu bytes missing", len);
+		from += n;
+		len -= n;
+		stream.next_in = inbuf;
+		stream.avail_in = n;
+		status = git_inflate(&stream, 0);
+	} while (len && status == Z_OK && !stream.avail_in);
 	git_inflate_end(&stream);
-	if (st != Z_STREAM_END || stream.total_out != obj->size)
+	/* This has been inflated OK when first encoumtered, so... */
+	if (status != Z_STREAM_END || stream.total_out != obj->size)
 		die("serious inflate inconsistency");
-	free(src);
 	return data;
 }
 
-- 
1.7.1.rc1.237.ge1730.dirty

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] index-pack: rationalize unpack_entry_data()
  2010-04-12  2:57 [PATCH 1/2] index-pack: smarter memory usage when resolving deltas Nicolas Pitre
@ 2010-04-12  2:57 ` Nicolas Pitre
  2010-04-12  6:36   ` Johannes Sixt
  2010-04-12  6:30 ` [PATCH 1/2] index-pack: smarter memory usage when resolving deltas Johannes Sixt
  1 sibling, 1 reply; 6+ messages in thread
From: Nicolas Pitre @ 2010-04-12  2:57 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Rework the loop to remove duplicated calls to use() and fill(), and
to make the code easier to read.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
---
 builtin/index-pack.c |   19 ++++++++-----------
 1 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index c746d3b..69b9167 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -266,27 +266,24 @@ static void unlink_base_data(struct base_data *c)
 
 static void *unpack_entry_data(unsigned long offset, unsigned long size)
 {
+	int status;
 	z_stream stream;
 	void *buf = xmalloc(size);
 
 	memset(&stream, 0, sizeof(stream));
+	git_inflate_init(&stream);
 	stream.next_out = buf;
 	stream.avail_out = size;
-	stream.next_in = fill(1);
-	stream.avail_in = input_len;
-	git_inflate_init(&stream);
 
-	for (;;) {
-		int ret = git_inflate(&stream, 0);
-		use(input_len - stream.avail_in);
-		if (stream.total_out == size && ret == Z_STREAM_END)
-			break;
-		if (ret != Z_OK)
-			bad_object(offset, "inflate returned %d", ret);
+	do {
 		stream.next_in = fill(1);
 		stream.avail_in = input_len;
-	}
+		status = git_inflate(&stream, 0);
+		use(input_len - stream.avail_in);
+	} while (status == Z_OK);
 	git_inflate_end(&stream);
+	if (stream.total_out != size || status != Z_STREAM_END)
+		bad_object(offset, "inflate returned %d", status);
 	return buf;
 }
 
-- 
1.7.1.rc1.237.ge1730.dirty

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] index-pack: smarter memory usage when resolving deltas
  2010-04-12  2:57 [PATCH 1/2] index-pack: smarter memory usage when resolving deltas Nicolas Pitre
  2010-04-12  2:57 ` [PATCH 2/2] index-pack: rationalize unpack_entry_data() Nicolas Pitre
@ 2010-04-12  6:30 ` Johannes Sixt
  2010-04-12 15:45   ` Nicolas Pitre
  1 sibling, 1 reply; 6+ messages in thread
From: Johannes Sixt @ 2010-04-12  6:30 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Junio C Hamano, git

Am 4/12/2010 4:57, schrieb Nicolas Pitre:
> --- a/builtin/index-pack.c
> +++ b/builtin/index-pack.c
> @@ -359,34 +359,33 @@ static void *get_data_from_pack(struct object_entry *obj)
>  {
>  	off_t from = obj[0].idx.offset + obj[0].hdr_size;
>  	unsigned long len = obj[1].idx.offset - from;
> -	unsigned long rdy = 0;
> -	unsigned char *src, *data;
> +	unsigned char *data, inbuf[4096];

With this tiny buffer we make way more pread() calls than we used to,
right? This hurts emulated pread(), where we do at least 4 syscalls per
pread(). Shouldn't the buffer be much larger, like min(len,16*4096)?

-- Hannes

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] index-pack: rationalize unpack_entry_data()
  2010-04-12  2:57 ` [PATCH 2/2] index-pack: rationalize unpack_entry_data() Nicolas Pitre
@ 2010-04-12  6:36   ` Johannes Sixt
  2010-04-12 15:47     ` Nicolas Pitre
  0 siblings, 1 reply; 6+ messages in thread
From: Johannes Sixt @ 2010-04-12  6:36 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Junio C Hamano, git

Am 4/12/2010 4:57, schrieb Nicolas Pitre:
> -	for (;;) {
> -		int ret = git_inflate(&stream, 0);
> -		use(input_len - stream.avail_in);
> -		if (stream.total_out == size && ret == Z_STREAM_END)
> -			break;
> -		if (ret != Z_OK)
> -			bad_object(offset, "inflate returned %d", ret);
> +	do {
>  		stream.next_in = fill(1);
>  		stream.avail_in = input_len;
> -	}
> +		status = git_inflate(&stream, 0);
> +		use(input_len - stream.avail_in);
> +	} while (status == Z_OK);
>  	git_inflate_end(&stream);
> +	if (stream.total_out != size || status != Z_STREAM_END)
> +		bad_object(offset, "inflate returned %d", status);
>  	return buf;

Is stream.total_out still valid after inflateEnd() (and I mean "valid by
definition", not just "by accident")? To stay on the safe side, you should
call git_inflate_end only after this last check.

-- Hannes

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] index-pack: smarter memory usage when resolving deltas
  2010-04-12  6:30 ` [PATCH 1/2] index-pack: smarter memory usage when resolving deltas Johannes Sixt
@ 2010-04-12 15:45   ` Nicolas Pitre
  0 siblings, 0 replies; 6+ messages in thread
From: Nicolas Pitre @ 2010-04-12 15:45 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Junio C Hamano, git

On Mon, 12 Apr 2010, Johannes Sixt wrote:

> Am 4/12/2010 4:57, schrieb Nicolas Pitre:
> > --- a/builtin/index-pack.c
> > +++ b/builtin/index-pack.c
> > @@ -359,34 +359,33 @@ static void *get_data_from_pack(struct object_entry *obj)
> >  {
> >  	off_t from = obj[0].idx.offset + obj[0].hdr_size;
> >  	unsigned long len = obj[1].idx.offset - from;
> > -	unsigned long rdy = 0;
> > -	unsigned char *src, *data;
> > +	unsigned char *data, inbuf[4096];
> 
> With this tiny buffer we make way more pread() calls than we used to,
> right? This hurts emulated pread(), where we do at least 4 syscalls per
> pread(). Shouldn't the buffer be much larger, like min(len,16*4096)?

Well, that is certainly a good compromize.  The buffer would need to be 
malloc()'d and free()'d again though.


Nicolas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] index-pack: rationalize unpack_entry_data()
  2010-04-12  6:36   ` Johannes Sixt
@ 2010-04-12 15:47     ` Nicolas Pitre
  0 siblings, 0 replies; 6+ messages in thread
From: Nicolas Pitre @ 2010-04-12 15:47 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Junio C Hamano, git

On Mon, 12 Apr 2010, Johannes Sixt wrote:

> Am 4/12/2010 4:57, schrieb Nicolas Pitre:
> > -	for (;;) {
> > -		int ret = git_inflate(&stream, 0);
> > -		use(input_len - stream.avail_in);
> > -		if (stream.total_out == size && ret == Z_STREAM_END)
> > -			break;
> > -		if (ret != Z_OK)
> > -			bad_object(offset, "inflate returned %d", ret);
> > +	do {
> >  		stream.next_in = fill(1);
> >  		stream.avail_in = input_len;
> > -	}
> > +		status = git_inflate(&stream, 0);
> > +		use(input_len - stream.avail_in);
> > +	} while (status == Z_OK);
> >  	git_inflate_end(&stream);
> > +	if (stream.total_out != size || status != Z_STREAM_END)
> > +		bad_object(offset, "inflate returned %d", status);
> >  	return buf;
> 
> Is stream.total_out still valid after inflateEnd() (and I mean "valid by
> definition", not just "by accident")? 

I don't see any indication to the contrary.

> To stay on the safe side, you should call git_inflate_end only after 
> this last check.

Agreed.  Will do.


Nicolas

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-04-12 16:26 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-12  2:57 [PATCH 1/2] index-pack: smarter memory usage when resolving deltas Nicolas Pitre
2010-04-12  2:57 ` [PATCH 2/2] index-pack: rationalize unpack_entry_data() Nicolas Pitre
2010-04-12  6:36   ` Johannes Sixt
2010-04-12 15:47     ` Nicolas Pitre
2010-04-12  6:30 ` [PATCH 1/2] index-pack: smarter memory usage when resolving deltas Johannes Sixt
2010-04-12 15:45   ` Nicolas Pitre

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).