All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org, Junio C Hamano <gitster@pobox.com>,
	Jonathan Niedier <jrnieder@gmail.com>
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH] Reduce zlib deflate code duplication
Date: Fri, 27 Aug 2010 06:56:04 +1000	[thread overview]
Message-ID: <1282856164-5126-1-git-send-email-pclouds@gmail.com> (raw)

Most of deflation code is simply "given this buffer, just deflate
it". Make a common routine and reuse it instead. 

Exceptions include index-pack, http-push, fast-import,
write_sha1_file, which either make multiple zlib streams, or compress
and calculate sha1/checksum in parallel.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 There is possibly a regression here. remote-curl.c/post_rpc()
 specifies more compression options, although I doubt it affects
 performance. It also reveals places where deflation is expected
 to be always successful.

 archive-zip.c          |   37 ++-----------------------------------
 builtin/pack-objects.c |   24 +++---------------------
 cache.h                |    1 +
 diff.c                 |   29 +++--------------------------
 fast-import.c          |   40 ++++++++++++----------------------------
 remote-curl.c          |   31 ++++---------------------------
 wrapper.c              |   30 ++++++++++++++++++++++++++++++
 7 files changed, 55 insertions(+), 137 deletions(-)

diff --git a/archive-zip.c b/archive-zip.c
index cf28504..64ec465 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -87,39 +87,6 @@ static void copy_le32(unsigned char *dest, unsigned int n)
 	dest[3] = 0xff & (n >> 030);
 }
 
-static void *zlib_deflate(void *data, unsigned long size,
-		int compression_level, unsigned long *compressed_size)
-{
-	z_stream stream;
-	unsigned long maxsize;
-	void *buffer;
-	int result;
-
-	memset(&stream, 0, sizeof(stream));
-	deflateInit(&stream, compression_level);
-	maxsize = deflateBound(&stream, size);
-	buffer = xmalloc(maxsize);
-
-	stream.next_in = data;
-	stream.avail_in = size;
-	stream.next_out = buffer;
-	stream.avail_out = maxsize;
-
-	do {
-		result = deflate(&stream, Z_FINISH);
-	} while (result == Z_OK);
-
-	if (result != Z_STREAM_END) {
-		free(buffer);
-		return NULL;
-	}
-
-	deflateEnd(&stream);
-	*compressed_size = stream.total_out;
-
-	return buffer;
-}
-
 static int write_zip_entry(struct archiver_args *args,
 		const unsigned char *sha1, const char *path, size_t pathlen,
 		unsigned int mode, void *buffer, unsigned long size)
@@ -164,8 +131,8 @@ static int write_zip_entry(struct archiver_args *args,
 	}
 
 	if (method == 8) {
-		deflated = zlib_deflate(buffer, size, args->compression_level,
-				&compressed_size);
+		compressed_size = size;
+		deflated = git_deflate(buffer, &compressed_size, args->compression_level);
 		if (deflated && compressed_size - 6 < size) {
 			/* ZLIB --> raw compressed data (see RFC 1950) */
 			/* CMF and FLG ... */
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0e81673..20e2b6e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -130,28 +130,10 @@ static void *get_delta(struct object_entry *entry)
 
 static unsigned long do_compress(void **pptr, unsigned long size)
 {
-	z_stream stream;
-	void *in, *out;
-	unsigned long maxsize;
-
-	memset(&stream, 0, sizeof(stream));
-	deflateInit(&stream, pack_compression_level);
-	maxsize = deflateBound(&stream, size);
-
-	in = *pptr;
-	out = xmalloc(maxsize);
+	void *out = git_deflate(*pptr, &size, pack_compression_level);
+	free(*pptr);
 	*pptr = out;
-
-	stream.next_in = in;
-	stream.avail_in = size;
-	stream.next_out = out;
-	stream.avail_out = maxsize;
-	while (deflate(&stream, Z_FINISH) == Z_OK)
-		; /* nothing */
-	deflateEnd(&stream);
-
-	free(in);
-	return stream.total_out;
+	return size;
 }
 
 /*
diff --git a/cache.h b/cache.h
index eb77e1d..c8ab52d 100644
--- a/cache.h
+++ b/cache.h
@@ -22,6 +22,7 @@
 void git_inflate_init(z_streamp strm);
 void git_inflate_end(z_streamp strm);
 int git_inflate(z_streamp strm, int flush);
+void *git_deflate(void *buf, unsigned long *size, int compression_level);
 
 #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
 #define DTYPE(de)	((de)->d_type)
diff --git a/diff.c b/diff.c
index 6fb97d4..18f2f55 100644
--- a/diff.c
+++ b/diff.c
@@ -1674,30 +1674,6 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len)
 	}
 }
 
-static unsigned char *deflate_it(char *data,
-				 unsigned long size,
-				 unsigned long *result_size)
-{
-	int bound;
-	unsigned char *deflated;
-	z_stream stream;
-
-	memset(&stream, 0, sizeof(stream));
-	deflateInit(&stream, zlib_compression_level);
-	bound = deflateBound(&stream, size);
-	deflated = xmalloc(bound);
-	stream.next_out = deflated;
-	stream.avail_out = bound;
-
-	stream.next_in = (unsigned char *)data;
-	stream.avail_in = size;
-	while (deflate(&stream, Z_FINISH) == Z_OK)
-		; /* nothing */
-	deflateEnd(&stream);
-	*result_size = stream.total_out;
-	return deflated;
-}
-
 static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char *prefix)
 {
 	void *cp;
@@ -1713,7 +1689,8 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
 	 * whichever is smaller.
 	 */
 	delta = NULL;
-	deflated = deflate_it(two->ptr, two->size, &deflate_size);
+	deflate_size = two->size;
+	deflated = git_deflate(two->ptr, &deflate_size, zlib_compression_level);
 	if (one->size && two->size) {
 		delta = diff_delta(one->ptr, one->size,
 				   two->ptr, two->size,
@@ -1721,7 +1698,7 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
 		if (delta) {
 			void *to_free = delta;
 			orig_size = delta_size;
-			delta = deflate_it(delta, delta_size, &delta_size);
+			delta = git_deflate(delta, &delta_size, zlib_compression_level);
 			free(to_free);
 		}
 	}
diff --git a/fast-import.c b/fast-import.c
index dd51ac4..28bac18 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -993,7 +993,7 @@ static int store_object(
 	unsigned char sha1[20];
 	unsigned long hdrlen, deltalen;
 	git_SHA_CTX c;
-	z_stream s;
+	unsigned long compressed_size;
 
 	hdrlen = sprintf((char *)hdr,"%s %lu", typename(type),
 		(unsigned long)dat->len) + 1;
@@ -1025,24 +1025,13 @@ static int store_object(
 	} else
 		delta = NULL;
 
-	memset(&s, 0, sizeof(s));
-	deflateInit(&s, pack_compression_level);
-	if (delta) {
-		s.next_in = delta;
-		s.avail_in = deltalen;
-	} else {
-		s.next_in = (void *)dat->buf;
-		s.avail_in = dat->len;
-	}
-	s.avail_out = deflateBound(&s, s.avail_in);
-	s.next_out = out = xmalloc(s.avail_out);
-	while (deflate(&s, Z_FINISH) == Z_OK)
-		/* nothing */;
-	deflateEnd(&s);
+	compressed_size = delta ? deltalen : dat->len;
+	out = git_deflate(delta ? delta : dat->buf, &compressed_size,
+			  pack_compression_level);
 
 	/* Determine if we should auto-checkpoint. */
-	if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
-		|| (pack_size + 60 + s.total_out) < pack_size) {
+	if ((max_packsize && (pack_size + 60 + compressed_size) > max_packsize)
+		|| (pack_size + 60 + compressed_size) < pack_size) {
 
 		/* This new object needs to *not* have the current pack_id. */
 		e->pack_id = pack_id + 1;
@@ -1053,15 +1042,10 @@ static int store_object(
 			free(delta);
 			delta = NULL;
 
-			memset(&s, 0, sizeof(s));
-			deflateInit(&s, pack_compression_level);
-			s.next_in = (void *)dat->buf;
-			s.avail_in = dat->len;
-			s.avail_out = deflateBound(&s, s.avail_in);
-			s.next_out = out = xrealloc(out, s.avail_out);
-			while (deflate(&s, Z_FINISH) == Z_OK)
-				/* nothing */;
-			deflateEnd(&s);
+			free(out);
+			compressed_size = dat->len;
+			out = git_deflate(dat->buf, &compressed_size,
+					  pack_compression_level);
 		}
 	}
 
@@ -1096,8 +1080,8 @@ static int store_object(
 		pack_size += hdrlen;
 	}
 
-	sha1write(pack_file, out, s.total_out);
-	pack_size += s.total_out;
+	sha1write(pack_file, out, compressed_size);
+	pack_size += compressed_size;
 
 	e->idx.crc32 = crc32_end(pack_file);
 
diff --git a/remote-curl.c b/remote-curl.c
index 04d4813..e0d07fc 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -420,33 +420,10 @@ static int post_rpc(struct rpc_state *rpc)
 		 * we can try to deflate it ourselves, this may save on.
 		 * the transfer time.
 		 */
-		size_t size;
-		z_stream stream;
-		int ret;
-
-		memset(&stream, 0, sizeof(stream));
-		ret = deflateInit2(&stream, Z_BEST_COMPRESSION,
-				Z_DEFLATED, (15 + 16),
-				8, Z_DEFAULT_STRATEGY);
-		if (ret != Z_OK)
-			die("cannot deflate request; zlib init error %d", ret);
-		size = deflateBound(&stream, rpc->len);
-		gzip_body = xmalloc(size);
-
-		stream.next_in = (unsigned char *)rpc->buf;
-		stream.avail_in = rpc->len;
-		stream.next_out = (unsigned char *)gzip_body;
-		stream.avail_out = size;
-
-		ret = deflate(&stream, Z_FINISH);
-		if (ret != Z_STREAM_END)
-			die("cannot deflate request; zlib deflate error %d", ret);
-
-		ret = deflateEnd(&stream);
-		if (ret != Z_OK)
-			die("cannot deflate request; zlib end error %d", ret);
-
-		size = stream.total_out;
+		unsigned long size = rpc->len;
+		gzip_body = git_deflate(rpc->buf, &size, Z_BEST_COMPRESSION);
+		if (!gzip_body)
+			die("cannot deflate request; zlib deflate error");
 
 		headers = curl_slist_append(headers, "Content-Encoding: gzip");
 		curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, gzip_body);
diff --git a/wrapper.c b/wrapper.c
index fd8ead3..fda7279 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -289,6 +289,36 @@ int git_inflate(z_streamp strm, int flush)
 	return ret;
 }
 
+void *git_deflate(void *in, unsigned long *size, int compression_level)
+{
+	z_stream stream;
+	void *out;
+	unsigned long maxsize;
+	int result;
+
+	memset(&stream, 0, sizeof(stream));
+	deflateInit(&stream, compression_level);
+	maxsize = deflateBound(&stream, *size);
+
+	out = xmalloc(maxsize);
+	stream.next_in = in;
+	stream.avail_in = *size;
+	stream.next_out = out;
+	stream.avail_out = maxsize;
+	while ((result = deflate(&stream, Z_FINISH)) == Z_OK)
+		; /* nothing */
+
+	if (result != Z_STREAM_END) {
+		*size = 0;
+		free(out);
+		return NULL;
+	}
+
+	deflateEnd(&stream);
+	*size = stream.total_out;
+	return out;
+}
+
 int odb_mkstemp(char *template, size_t limit, const char *pattern)
 {
 	int fd;
-- 
1.7.1.rc1.69.g24c2f7

             reply	other threads:[~2010-08-26 22:15 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-26 20:56 Nguyễn Thái Ngọc Duy [this message]
2010-08-27  1:56 ` [PATCH] Reduce zlib deflate code duplication Shawn Pearce
2010-08-27  2:37 ` Jonathan Nieder
2010-08-30  1:46 ` Nguyễn Thái Ngọc Duy
2010-08-31 17:46   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1282856164-5126-1-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jrnieder@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.