* [PATCH] Reduce zlib deflate code duplication
@ 2010-08-26 20:56 Nguyễn Thái Ngọc Duy
2010-08-27 1:56 ` Shawn Pearce
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2010-08-26 20:56 UTC (permalink / raw)
To: git, Junio C Hamano, Jonathan Niedier
Cc: Nguyễn Thái Ngọc Duy
Most of deflation code is simply "given this buffer, just deflate
it". Make a common routine and reuse it instead.
Exceptions include index-pack, http-push, fast-import,
write_sha1_file, which either make multiple zlib streams, or compress
and calculate sha1/checksum in parallel.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
There is possibly a regression here. remote-curl.c/post_rpc()
specifies more compression options, although I doubt it affects
performance. It also reveals places where deflation is expected
to be always successful.
archive-zip.c | 37 ++-----------------------------------
builtin/pack-objects.c | 24 +++---------------------
cache.h | 1 +
diff.c | 29 +++--------------------------
fast-import.c | 40 ++++++++++++----------------------------
remote-curl.c | 31 ++++---------------------------
wrapper.c | 30 ++++++++++++++++++++++++++++++
7 files changed, 55 insertions(+), 137 deletions(-)
diff --git a/archive-zip.c b/archive-zip.c
index cf28504..64ec465 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -87,39 +87,6 @@ static void copy_le32(unsigned char *dest, unsigned int n)
dest[3] = 0xff & (n >> 030);
}
-static void *zlib_deflate(void *data, unsigned long size,
- int compression_level, unsigned long *compressed_size)
-{
- z_stream stream;
- unsigned long maxsize;
- void *buffer;
- int result;
-
- memset(&stream, 0, sizeof(stream));
- deflateInit(&stream, compression_level);
- maxsize = deflateBound(&stream, size);
- buffer = xmalloc(maxsize);
-
- stream.next_in = data;
- stream.avail_in = size;
- stream.next_out = buffer;
- stream.avail_out = maxsize;
-
- do {
- result = deflate(&stream, Z_FINISH);
- } while (result == Z_OK);
-
- if (result != Z_STREAM_END) {
- free(buffer);
- return NULL;
- }
-
- deflateEnd(&stream);
- *compressed_size = stream.total_out;
-
- return buffer;
-}
-
static int write_zip_entry(struct archiver_args *args,
const unsigned char *sha1, const char *path, size_t pathlen,
unsigned int mode, void *buffer, unsigned long size)
@@ -164,8 +131,8 @@ static int write_zip_entry(struct archiver_args *args,
}
if (method == 8) {
- deflated = zlib_deflate(buffer, size, args->compression_level,
- &compressed_size);
+ compressed_size = size;
+ deflated = git_deflate(buffer, &compressed_size, args->compression_level);
if (deflated && compressed_size - 6 < size) {
/* ZLIB --> raw compressed data (see RFC 1950) */
/* CMF and FLG ... */
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0e81673..20e2b6e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -130,28 +130,10 @@ static void *get_delta(struct object_entry *entry)
static unsigned long do_compress(void **pptr, unsigned long size)
{
- z_stream stream;
- void *in, *out;
- unsigned long maxsize;
-
- memset(&stream, 0, sizeof(stream));
- deflateInit(&stream, pack_compression_level);
- maxsize = deflateBound(&stream, size);
-
- in = *pptr;
- out = xmalloc(maxsize);
+ void *out = git_deflate(*pptr, &size, pack_compression_level);
+ free(*pptr);
*pptr = out;
-
- stream.next_in = in;
- stream.avail_in = size;
- stream.next_out = out;
- stream.avail_out = maxsize;
- while (deflate(&stream, Z_FINISH) == Z_OK)
- ; /* nothing */
- deflateEnd(&stream);
-
- free(in);
- return stream.total_out;
+ return size;
}
/*
diff --git a/cache.h b/cache.h
index eb77e1d..c8ab52d 100644
--- a/cache.h
+++ b/cache.h
@@ -22,6 +22,7 @@
void git_inflate_init(z_streamp strm);
void git_inflate_end(z_streamp strm);
int git_inflate(z_streamp strm, int flush);
+void *git_deflate(void *buf, unsigned long *size, int compression_level);
#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
#define DTYPE(de) ((de)->d_type)
diff --git a/diff.c b/diff.c
index 6fb97d4..18f2f55 100644
--- a/diff.c
+++ b/diff.c
@@ -1674,30 +1674,6 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len)
}
}
-static unsigned char *deflate_it(char *data,
- unsigned long size,
- unsigned long *result_size)
-{
- int bound;
- unsigned char *deflated;
- z_stream stream;
-
- memset(&stream, 0, sizeof(stream));
- deflateInit(&stream, zlib_compression_level);
- bound = deflateBound(&stream, size);
- deflated = xmalloc(bound);
- stream.next_out = deflated;
- stream.avail_out = bound;
-
- stream.next_in = (unsigned char *)data;
- stream.avail_in = size;
- while (deflate(&stream, Z_FINISH) == Z_OK)
- ; /* nothing */
- deflateEnd(&stream);
- *result_size = stream.total_out;
- return deflated;
-}
-
static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char *prefix)
{
void *cp;
@@ -1713,7 +1689,8 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
* whichever is smaller.
*/
delta = NULL;
- deflated = deflate_it(two->ptr, two->size, &deflate_size);
+ deflate_size = two->size;
+ deflated = git_deflate(two->ptr, &deflate_size, zlib_compression_level);
if (one->size && two->size) {
delta = diff_delta(one->ptr, one->size,
two->ptr, two->size,
@@ -1721,7 +1698,7 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
if (delta) {
void *to_free = delta;
orig_size = delta_size;
- delta = deflate_it(delta, delta_size, &delta_size);
+ delta = git_deflate(delta, &delta_size, zlib_compression_level);
free(to_free);
}
}
diff --git a/fast-import.c b/fast-import.c
index dd51ac4..28bac18 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -993,7 +993,7 @@ static int store_object(
unsigned char sha1[20];
unsigned long hdrlen, deltalen;
git_SHA_CTX c;
- z_stream s;
+ unsigned long compressed_size;
hdrlen = sprintf((char *)hdr,"%s %lu", typename(type),
(unsigned long)dat->len) + 1;
@@ -1025,24 +1025,13 @@ static int store_object(
} else
delta = NULL;
- memset(&s, 0, sizeof(s));
- deflateInit(&s, pack_compression_level);
- if (delta) {
- s.next_in = delta;
- s.avail_in = deltalen;
- } else {
- s.next_in = (void *)dat->buf;
- s.avail_in = dat->len;
- }
- s.avail_out = deflateBound(&s, s.avail_in);
- s.next_out = out = xmalloc(s.avail_out);
- while (deflate(&s, Z_FINISH) == Z_OK)
- /* nothing */;
- deflateEnd(&s);
+ compressed_size = delta ? deltalen : dat->len;
+ out = git_deflate(delta ? delta : dat->buf, &compressed_size,
+ pack_compression_level);
/* Determine if we should auto-checkpoint. */
- if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
- || (pack_size + 60 + s.total_out) < pack_size) {
+ if ((max_packsize && (pack_size + 60 + compressed_size) > max_packsize)
+ || (pack_size + 60 + compressed_size) < pack_size) {
/* This new object needs to *not* have the current pack_id. */
e->pack_id = pack_id + 1;
@@ -1053,15 +1042,10 @@ static int store_object(
free(delta);
delta = NULL;
- memset(&s, 0, sizeof(s));
- deflateInit(&s, pack_compression_level);
- s.next_in = (void *)dat->buf;
- s.avail_in = dat->len;
- s.avail_out = deflateBound(&s, s.avail_in);
- s.next_out = out = xrealloc(out, s.avail_out);
- while (deflate(&s, Z_FINISH) == Z_OK)
- /* nothing */;
- deflateEnd(&s);
+ free(out);
+ compressed_size = dat->len;
+ out = git_deflate(dat->buf, &compressed_size,
+ pack_compression_level);
}
}
@@ -1096,8 +1080,8 @@ static int store_object(
pack_size += hdrlen;
}
- sha1write(pack_file, out, s.total_out);
- pack_size += s.total_out;
+ sha1write(pack_file, out, compressed_size);
+ pack_size += compressed_size;
e->idx.crc32 = crc32_end(pack_file);
diff --git a/remote-curl.c b/remote-curl.c
index 04d4813..e0d07fc 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -420,33 +420,10 @@ static int post_rpc(struct rpc_state *rpc)
* we can try to deflate it ourselves, this may save on.
* the transfer time.
*/
- size_t size;
- z_stream stream;
- int ret;
-
- memset(&stream, 0, sizeof(stream));
- ret = deflateInit2(&stream, Z_BEST_COMPRESSION,
- Z_DEFLATED, (15 + 16),
- 8, Z_DEFAULT_STRATEGY);
- if (ret != Z_OK)
- die("cannot deflate request; zlib init error %d", ret);
- size = deflateBound(&stream, rpc->len);
- gzip_body = xmalloc(size);
-
- stream.next_in = (unsigned char *)rpc->buf;
- stream.avail_in = rpc->len;
- stream.next_out = (unsigned char *)gzip_body;
- stream.avail_out = size;
-
- ret = deflate(&stream, Z_FINISH);
- if (ret != Z_STREAM_END)
- die("cannot deflate request; zlib deflate error %d", ret);
-
- ret = deflateEnd(&stream);
- if (ret != Z_OK)
- die("cannot deflate request; zlib end error %d", ret);
-
- size = stream.total_out;
+ unsigned long size = rpc->len;
+ gzip_body = git_deflate(rpc->buf, &size, Z_BEST_COMPRESSION);
+ if (!gzip_body)
+ die("cannot deflate request; zlib deflate error");
headers = curl_slist_append(headers, "Content-Encoding: gzip");
curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, gzip_body);
diff --git a/wrapper.c b/wrapper.c
index fd8ead3..fda7279 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -289,6 +289,36 @@ int git_inflate(z_streamp strm, int flush)
return ret;
}
+void *git_deflate(void *in, unsigned long *size, int compression_level)
+{
+ z_stream stream;
+ void *out;
+ unsigned long maxsize;
+ int result;
+
+ memset(&stream, 0, sizeof(stream));
+ deflateInit(&stream, compression_level);
+ maxsize = deflateBound(&stream, *size);
+
+ out = xmalloc(maxsize);
+ stream.next_in = in;
+ stream.avail_in = *size;
+ stream.next_out = out;
+ stream.avail_out = maxsize;
+ while ((result = deflate(&stream, Z_FINISH)) == Z_OK)
+ ; /* nothing */
+
+ if (result != Z_STREAM_END) {
+ *size = 0;
+ free(out);
+ return NULL;
+ }
+
+ deflateEnd(&stream);
+ *size = stream.total_out;
+ return out;
+}
+
int odb_mkstemp(char *template, size_t limit, const char *pattern)
{
int fd;
--
1.7.1.rc1.69.g24c2f7
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] Reduce zlib deflate code duplication
2010-08-26 20:56 [PATCH] Reduce zlib deflate code duplication Nguyễn Thái Ngọc Duy
@ 2010-08-27 1:56 ` Shawn Pearce
2010-08-27 2:37 ` Jonathan Nieder
2010-08-30 1:46 ` Nguyễn Thái Ngọc Duy
2 siblings, 0 replies; 5+ messages in thread
From: Shawn Pearce @ 2010-08-27 1:56 UTC (permalink / raw)
To: Nguyễn Thái Ngọc Duy
Cc: git, Junio C Hamano, Jonathan Niedier
2010/8/26 Nguyễn Thái Ngọc Duy <pclouds@gmail.com>:
> Most of deflation code is simply "given this buffer, just deflate
> it". Make a common routine and reuse it instead.
>
> Exceptions include index-pack, http-push, fast-import,
> write_sha1_file, which either make multiple zlib streams, or compress
> and calculate sha1/checksum in parallel.
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
> There is possibly a regression here. remote-curl.c/post_rpc()
> specifies more compression options, although I doubt it affects
> performance. It also reveals places where deflation is expected
> to be always successful.
It is a regression.
The compression options in remote-curl.c are to setup a gzip style
stream, because that is what the servers expect. Your new common
routine is using the bare deflation stream, which isn't the same
header/footer formatting, and thus servers will choke with your change
and smart HTTP payloads that are compressed by the client.
So yea, you can't get rid of all of those options for the remote-curl
init. You'll need to still let remote-curl setup its options itself,
and then pass you the zlib structure to deflate on.
--
Shawn.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Reduce zlib deflate code duplication
2010-08-26 20:56 [PATCH] Reduce zlib deflate code duplication Nguyễn Thái Ngọc Duy
2010-08-27 1:56 ` Shawn Pearce
@ 2010-08-27 2:37 ` Jonathan Nieder
2010-08-30 1:46 ` Nguyễn Thái Ngọc Duy
2 siblings, 0 replies; 5+ messages in thread
From: Jonathan Nieder @ 2010-08-27 2:37 UTC (permalink / raw)
To: Nguyễn Thái Ngọc Duy; +Cc: git, Junio C Hamano, Shawn Pearce
Nguyễn Thái Ngọc Duy wrote:
> Most of deflation code is simply "given this buffer, just deflate
> it". Make a common routine and reuse it instead.
I like this idea. But:
> There is possibly a regression here.
Right.
> --- a/archive-zip.c
> +++ b/archive-zip.c
Looks good.
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -130,28 +130,10 @@ static void *get_delta(struct object_entry *entry)
>
> static unsigned long do_compress(void **pptr, unsigned long size)
> {
> - z_stream stream;
> - void *in, *out;
> - unsigned long maxsize;
> -
> - memset(&stream, 0, sizeof(stream));
> - deflateInit(&stream, pack_compression_level);
> - maxsize = deflateBound(&stream, size);
> -
> - in = *pptr;
> - out = xmalloc(maxsize);
> + void *out = git_deflate(*pptr, &size, pack_compression_level);
> + free(*pptr);
> *pptr = out;
On error, previously *pptr and size would reflect a truncated result,
but now *pptr is NULL and size is 0. Both results are silly.
It would be nicer if the caller (or do_compress itself) could check
for errors and report them.
> --- a/diff.c
> +++ b/diff.c
> @@ -1713,7 +1689,8 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
> * whichever is smaller.
> */
> delta = NULL;
> - deflated = deflate_it(two->ptr, two->size, &deflate_size);
> + deflate_size = two->size;
> + deflated = git_deflate(two->ptr, &deflate_size, zlib_compression_level);
[...]
> @@ -1721,7 +1698,7 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
> if (delta) {
> void *to_free = delta;
> orig_size = delta_size;
> - delta = deflate_it(delta, delta_size, &delta_size);
> + delta = git_deflate(delta, &delta_size, zlib_compression_level);
[...]
> --- a/fast-import.c
> +++ b/fast-import.c
> @@ -1025,24 +1025,13 @@ static int store_object(
[...]
> - while (deflate(&s, Z_FINISH) == Z_OK)
> - /* nothing */;
> - deflateEnd(&s);
> + compressed_size = delta ? deltalen : dat->len;
> + out = git_deflate(delta ? delta : dat->buf, &compressed_size,
> + pack_compression_level);
[...]
> @@ -1053,15 +1042,10 @@ static int store_object(
[...]
> - s.next_out = out = xrealloc(out, s.avail_out);
> - while (deflate(&s, Z_FINISH) == Z_OK)
> - /* nothing */;
> - deflateEnd(&s);
> + free(out);
> + compressed_size = dat->len;
> + out = git_deflate(dat->buf, &compressed_size,
> + pack_compression_level);
Likewise.
> --- a/remote-curl.c
> +++ b/remote-curl.c
> @@ -420,33 +420,10 @@ static int post_rpc(struct rpc_state *rpc)
> * we can try to deflate it ourselves, this may save on.
> * the transfer time.
> */
> - size_t size;
> - z_stream stream;
> - int ret;
> -
> - memset(&stream, 0, sizeof(stream));
> - ret = deflateInit2(&stream, Z_BEST_COMPRESSION,
> - Z_DEFLATED, (15 + 16),
As Shawn mentioned, this requests gzip encoding with the default
window size.
> - ret = deflate(&stream, Z_FINISH);
> - if (ret != Z_STREAM_END)
> - die("cannot deflate request; zlib deflate error %d", ret);
> -
> - ret = deflateEnd(&stream);
> - if (ret != Z_OK)
> - die("cannot deflate request; zlib end error %d", ret);
> -
> - size = stream.total_out;
> + unsigned long size = rpc->len;
> + gzip_body = git_deflate(rpc->buf, &size, Z_BEST_COMPRESSION);
> + if (!gzip_body)
> + die("cannot deflate request; zlib deflate error");
The zlib error codes are very helpful for debugging, so I would be sad
to see them go.
Thanks,
Jonathan
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] Reduce zlib deflate code duplication
2010-08-26 20:56 [PATCH] Reduce zlib deflate code duplication Nguyễn Thái Ngọc Duy
2010-08-27 1:56 ` Shawn Pearce
2010-08-27 2:37 ` Jonathan Nieder
@ 2010-08-30 1:46 ` Nguyễn Thái Ngọc Duy
2010-08-31 17:46 ` Junio C Hamano
2 siblings, 1 reply; 5+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2010-08-30 1:46 UTC (permalink / raw)
To: Junio C Hamano, Jonathan Niedier, git, spearce
Cc: Nguyễn Thái Ngọc Duy
This patch make a common deflate routine and reuse that routine as
much as possible.
Most of deflation code is simply "given this buffer, just deflate
it". Exceptions include index-pack, http-push, fast-import,
write_sha1_file and remote-curl, which either make multiple zlib
streams, or compress and calculate sha1/checksum in parallel, or
create gzip stream.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
remote-curl is now excluded from the patch.
git_deflate() returns zlib error code and will be used to print
something out. I'm not sure if it should die() inside diff.c.
It error() onlys for now.
archive-zip.c | 37 ++-----------------------------------
builtin/pack-objects.c | 27 ++++++---------------------
cache.h | 1 +
diff.c | 41 +++++++++++++++--------------------------
fast-import.c | 45 +++++++++++++++++----------------------------
wrapper.c | 31 +++++++++++++++++++++++++++++++
6 files changed, 72 insertions(+), 110 deletions(-)
diff --git a/archive-zip.c b/archive-zip.c
index cf28504..ed176ca 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -87,39 +87,6 @@ static void copy_le32(unsigned char *dest, unsigned int n)
dest[3] = 0xff & (n >> 030);
}
-static void *zlib_deflate(void *data, unsigned long size,
- int compression_level, unsigned long *compressed_size)
-{
- z_stream stream;
- unsigned long maxsize;
- void *buffer;
- int result;
-
- memset(&stream, 0, sizeof(stream));
- deflateInit(&stream, compression_level);
- maxsize = deflateBound(&stream, size);
- buffer = xmalloc(maxsize);
-
- stream.next_in = data;
- stream.avail_in = size;
- stream.next_out = buffer;
- stream.avail_out = maxsize;
-
- do {
- result = deflate(&stream, Z_FINISH);
- } while (result == Z_OK);
-
- if (result != Z_STREAM_END) {
- free(buffer);
- return NULL;
- }
-
- deflateEnd(&stream);
- *compressed_size = stream.total_out;
-
- return buffer;
-}
-
static int write_zip_entry(struct archiver_args *args,
const unsigned char *sha1, const char *path, size_t pathlen,
unsigned int mode, void *buffer, unsigned long size)
@@ -164,8 +131,8 @@ static int write_zip_entry(struct archiver_args *args,
}
if (method == 8) {
- deflated = zlib_deflate(buffer, size, args->compression_level,
- &compressed_size);
+ compressed_size = size;
+ deflated = git_deflate(buffer, &compressed_size, args->compression_level, NULL);
if (deflated && compressed_size - 6 < size) {
/* ZLIB --> raw compressed data (see RFC 1950) */
/* CMF and FLG ... */
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0e81673..9e78e88 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -130,28 +130,13 @@ static void *get_delta(struct object_entry *entry)
static unsigned long do_compress(void **pptr, unsigned long size)
{
- z_stream stream;
- void *in, *out;
- unsigned long maxsize;
-
- memset(&stream, 0, sizeof(stream));
- deflateInit(&stream, pack_compression_level);
- maxsize = deflateBound(&stream, size);
-
- in = *pptr;
- out = xmalloc(maxsize);
+ int zlib_error;
+ void *out = git_deflate(*pptr, &size, pack_compression_level, &zlib_error);
+ if (!out)
+ die("failed to compress, zlib error %d", zlib_error);
+ free(*pptr);
*pptr = out;
-
- stream.next_in = in;
- stream.avail_in = size;
- stream.next_out = out;
- stream.avail_out = maxsize;
- while (deflate(&stream, Z_FINISH) == Z_OK)
- ; /* nothing */
- deflateEnd(&stream);
-
- free(in);
- return stream.total_out;
+ return size;
}
/*
diff --git a/cache.h b/cache.h
index eb77e1d..afca130 100644
--- a/cache.h
+++ b/cache.h
@@ -22,6 +22,7 @@
void git_inflate_init(z_streamp strm);
void git_inflate_end(z_streamp strm);
int git_inflate(z_streamp strm, int flush);
+void *git_deflate(void *buf, unsigned long *size, int compression_level, int *error);
#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
#define DTYPE(de) ((de)->d_type)
diff --git a/diff.c b/diff.c
index 6fb97d4..56ef7a0 100644
--- a/diff.c
+++ b/diff.c
@@ -1674,30 +1674,6 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len)
}
}
-static unsigned char *deflate_it(char *data,
- unsigned long size,
- unsigned long *result_size)
-{
- int bound;
- unsigned char *deflated;
- z_stream stream;
-
- memset(&stream, 0, sizeof(stream));
- deflateInit(&stream, zlib_compression_level);
- bound = deflateBound(&stream, size);
- deflated = xmalloc(bound);
- stream.next_out = deflated;
- stream.avail_out = bound;
-
- stream.next_in = (unsigned char *)data;
- stream.avail_in = size;
- while (deflate(&stream, Z_FINISH) == Z_OK)
- ; /* nothing */
- deflateEnd(&stream);
- *result_size = stream.total_out;
- return deflated;
-}
-
static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char *prefix)
{
void *cp;
@@ -1708,12 +1684,19 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
unsigned long delta_size;
unsigned long deflate_size;
unsigned long data_size;
+ int zlib_error;
/* We could do deflated delta, or we could do just deflated two,
* whichever is smaller.
*/
delta = NULL;
- deflated = deflate_it(two->ptr, two->size, &deflate_size);
+ deflate_size = two->size;
+ deflated = git_deflate(two->ptr, &deflate_size, zlib_compression_level, &zlib_error);
+ if (!deflated) {
+ error("failed to compress for binary diff prefix %s, zlib error %d",
+ prefix, zlib_error);
+ return;
+ }
if (one->size && two->size) {
delta = diff_delta(one->ptr, one->size,
two->ptr, two->size,
@@ -1721,8 +1704,14 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
if (delta) {
void *to_free = delta;
orig_size = delta_size;
- delta = deflate_it(delta, delta_size, &delta_size);
+ delta = git_deflate(delta, &delta_size, zlib_compression_level, NULL);
free(to_free);
+ if (!delta) {
+ free(deflated);
+ error("failed to compress for delta prefix %s, zlib error %d",
+ prefix, zlib_error);
+ return;
+ }
}
}
diff --git a/fast-import.c b/fast-import.c
index dd51ac4..cbdab25 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -993,7 +993,8 @@ static int store_object(
unsigned char sha1[20];
unsigned long hdrlen, deltalen;
git_SHA_CTX c;
- z_stream s;
+ unsigned long compressed_size;
+ int zlib_error;
hdrlen = sprintf((char *)hdr,"%s %lu", typename(type),
(unsigned long)dat->len) + 1;
@@ -1025,24 +1026,15 @@ static int store_object(
} else
delta = NULL;
- memset(&s, 0, sizeof(s));
- deflateInit(&s, pack_compression_level);
- if (delta) {
- s.next_in = delta;
- s.avail_in = deltalen;
- } else {
- s.next_in = (void *)dat->buf;
- s.avail_in = dat->len;
- }
- s.avail_out = deflateBound(&s, s.avail_in);
- s.next_out = out = xmalloc(s.avail_out);
- while (deflate(&s, Z_FINISH) == Z_OK)
- /* nothing */;
- deflateEnd(&s);
+ compressed_size = delta ? deltalen : dat->len;
+ out = git_deflate(delta ? delta : dat->buf, &compressed_size,
+ pack_compression_level, &zlib_error);
+ if (!out)
+ die("failed to compress, zlib error %d", zlib_error);
/* Determine if we should auto-checkpoint. */
- if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
- || (pack_size + 60 + s.total_out) < pack_size) {
+ if ((max_packsize && (pack_size + 60 + compressed_size) > max_packsize)
+ || (pack_size + 60 + compressed_size) < pack_size) {
/* This new object needs to *not* have the current pack_id. */
e->pack_id = pack_id + 1;
@@ -1053,15 +1045,12 @@ static int store_object(
free(delta);
delta = NULL;
- memset(&s, 0, sizeof(s));
- deflateInit(&s, pack_compression_level);
- s.next_in = (void *)dat->buf;
- s.avail_in = dat->len;
- s.avail_out = deflateBound(&s, s.avail_in);
- s.next_out = out = xrealloc(out, s.avail_out);
- while (deflate(&s, Z_FINISH) == Z_OK)
- /* nothing */;
- deflateEnd(&s);
+ free(out);
+ compressed_size = dat->len;
+ out = git_deflate(dat->buf, &compressed_size,
+ pack_compression_level, &zlib_error);
+ if (!out)
+ die("failed to compress, zlib error %d", zlib_error);
}
}
@@ -1096,8 +1085,8 @@ static int store_object(
pack_size += hdrlen;
}
- sha1write(pack_file, out, s.total_out);
- pack_size += s.total_out;
+ sha1write(pack_file, out, compressed_size);
+ pack_size += compressed_size;
e->idx.crc32 = crc32_end(pack_file);
diff --git a/wrapper.c b/wrapper.c
index fd8ead3..d0d3de9 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -289,6 +289,37 @@ int git_inflate(z_streamp strm, int flush)
return ret;
}
+void *git_deflate(void *in, unsigned long *size, int compression_level, int *error)
+{
+ z_stream stream;
+ void *out;
+ unsigned long maxsize;
+ int result;
+
+ memset(&stream, 0, sizeof(stream));
+ deflateInit(&stream, compression_level);
+ maxsize = deflateBound(&stream, *size);
+
+ out = xmalloc(maxsize);
+ stream.next_in = in;
+ stream.avail_in = *size;
+ stream.next_out = out;
+ stream.avail_out = maxsize;
+ while ((result = deflate(&stream, Z_FINISH)) == Z_OK)
+ ; /* nothing */
+
+ if (result != Z_STREAM_END ||
+ (result = deflateEnd(&stream)) != Z_OK) {
+ if (error)
+ *error = result;
+ *size = 0;
+ free(out);
+ return NULL;
+ }
+ *size = stream.total_out;
+ return out;
+}
+
int odb_mkstemp(char *template, size_t limit, const char *pattern)
{
int fd;
--
1.7.1.rc1.69.g24c2f7
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] Reduce zlib deflate code duplication
2010-08-30 1:46 ` Nguyễn Thái Ngọc Duy
@ 2010-08-31 17:46 ` Junio C Hamano
0 siblings, 0 replies; 5+ messages in thread
From: Junio C Hamano @ 2010-08-31 17:46 UTC (permalink / raw)
To: Nguyễn Thái Ngọc Duy; +Cc: Jonathan Niedier, git, spearce
Nguyễn Thái Ngọc Duy <pclouds@gmail.com> writes:
> diff --git a/archive-zip.c b/archive-zip.c
> index cf28504..ed176ca 100644
> --- a/archive-zip.c
> +++ b/archive-zip.c
> @@ -87,39 +87,6 @@ static void copy_le32(unsigned char *dest, unsigned int n)
> dest[3] = 0xff & (n >> 030);
> }
>
> -static void *zlib_deflate(void *data, unsigned long size,
> - int compression_level, unsigned long *compressed_size)
> -{
This name makes more sense than git_deflate() as it is about "deflating by
calling zlib". For a common library routine "frotz", git_frotz tends to
be used to name our own replacement for it, but with this patch we are not
creating our own implementation of deflate algorithm.
> - ...
> - deflateEnd(&stream);
We used not to check the return value of this call here; the new function
in wrapper.c insists that this returns Z_OK. I think it is an improvement
that deserves to be mentioned in the proposed log message.
> @@ -164,8 +131,8 @@ static int write_zip_entry(struct archiver_args *args,
> }
>
> if (method == 8) {
> - deflated = zlib_deflate(buffer, size, args->compression_level,
> - &compressed_size);
> + compressed_size = size;
Isn't it awkward that you have to assign the size of the uncompressed
input to a variable you are planning to use to hold the compressed size
with your API? I do not see a compelling reason that we would want to use
an in-out parameter here.
> @@ -1708,12 +1684,19 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
> unsigned long delta_size;
> unsigned long deflate_size;
> unsigned long data_size;
> + int zlib_error;
>
> /* We could do deflated delta, or we could do just deflated two,
> * whichever is smaller.
> */
> delta = NULL;
> - deflated = deflate_it(two->ptr, two->size, &deflate_size);
> + deflate_size = two->size;
> + deflated = git_deflate(two->ptr, &deflate_size, zlib_compression_level, &zlib_error);
> + if (!deflated) {
> + error("failed to compress for binary diff prefix %s, zlib error %d",
> + prefix, zlib_error);
What is this "prefix" about?
I think the callchain here is that builtin_diff() sets its local variable
line_prefix according to the o->output_prefix (used by graphing cruft) and
it is passed to emit_binary_diff(). It certainly shouldn't be in the
middle of any line, and I don't think you want it in the error message in
the first place. Likewise for another error message nearby.
Thanks.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-08-31 17:47 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-26 20:56 [PATCH] Reduce zlib deflate code duplication Nguyễn Thái Ngọc Duy
2010-08-27 1:56 ` Shawn Pearce
2010-08-27 2:37 ` Jonathan Nieder
2010-08-30 1:46 ` Nguyễn Thái Ngọc Duy
2010-08-31 17:46 ` Junio C Hamano
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).