* [PATCH 0/7] assorted pack-objects cleanups and improvements @ 2008-05-02 19:11 Nicolas Pitre 2008-05-02 19:11 ` [PATCH 1/7] pack-objects: small cleanup Nicolas Pitre 0 siblings, 1 reply; 10+ messages in thread From: Nicolas Pitre @ 2008-05-02 19:11 UTC (permalink / raw) To: Junio C Hamano; +Cc: git This is a series of patches with small fixes, minor cleanups and some improvements especially for threaded repacks. The diffstat is rather simple: builtin-pack-objects.c | 197 +++++++++++++++++++++++++++++------------------- 1 files changed, 118 insertions(+), 79 deletions(-) Nicolas ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 1/7] pack-objects: small cleanup 2008-05-02 19:11 [PATCH 0/7] assorted pack-objects cleanups and improvements Nicolas Pitre @ 2008-05-02 19:11 ` Nicolas Pitre 2008-05-02 19:11 ` [PATCH 2/7] pack-objects: remove some double negative logic Nicolas Pitre 0 siblings, 1 reply; 10+ messages in thread From: Nicolas Pitre @ 2008-05-02 19:11 UTC (permalink / raw) To: Junio C Hamano; +Cc: git Better encapsulate delta creation for writing. Signed-off-by: Nicolas Pitre <nico@cam.org> --- builtin-pack-objects.c | 27 +++++++++++++-------------- 1 files changed, 13 insertions(+), 14 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 777f272..8691c99 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -102,21 +102,24 @@ static uint32_t written, written_delta; static uint32_t reused, reused_delta; -static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) +static void *get_delta(struct object_entry *entry) { - unsigned long othersize, delta_size; + unsigned long size, base_size, delta_size; + void *buf, *base_buf, *delta_buf; enum object_type type; - void *otherbuf = read_sha1_file(entry->delta->idx.sha1, &type, &othersize); - void *delta_buf; - if (!otherbuf) + buf = read_sha1_file(entry->idx.sha1, &type, &size); + if (!buf) + die("unable to read %s", sha1_to_hex(entry->idx.sha1)); + base_buf = read_sha1_file(entry->delta->idx.sha1, &type, &base_size); + if (!base_buf) die("unable to read %s", sha1_to_hex(entry->delta->idx.sha1)); - delta_buf = diff_delta(otherbuf, othersize, + delta_buf = diff_delta(base_buf, base_size, buf, size, &delta_size, 0); - if (!delta_buf || delta_size != entry->delta_size) + if (!delta_buf || delta_size != entry->delta_size) die("delta size changed"); - free(buf); - free(otherbuf); + free(buf); + free(base_buf); return delta_buf; } @@ -223,7 +226,6 @@ static unsigned long write_object(struct sha1file *f, off_t write_offset) { unsigned long size; - enum object_type type; void *buf; unsigned char header[10]; unsigned char dheader[10]; @@ -281,10 +283,7 @@ static unsigned long write_object(struct sha1file *f, obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } else { - buf = read_sha1_file(entry->idx.sha1, &type, &size); - if (!buf) - die("unable to read %s", sha1_to_hex(entry->idx.sha1)); - buf = delta_against(buf, size, entry); + buf = get_delta(entry); size = entry->delta_size; obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; -- 1.5.5.1.226.g6f6e8 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/7] pack-objects: remove some double negative logic 2008-05-02 19:11 ` [PATCH 1/7] pack-objects: small cleanup Nicolas Pitre @ 2008-05-02 19:11 ` Nicolas Pitre 2008-05-02 19:11 ` [PATCH 3/7] pack-objects: simplify the condition associated with --all-progress Nicolas Pitre 0 siblings, 1 reply; 10+ messages in thread From: Nicolas Pitre @ 2008-05-02 19:11 UTC (permalink / raw) To: Junio C Hamano; +Cc: git Parsing !no_reuse_delta everywhere makes my brain spend extra cycles wondering each time. Signed-off-by: Nicolas Pitre <nico@cam.org> --- builtin-pack-objects.c | 17 +++++++++-------- 1 files changed, 9 insertions(+), 8 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 8691c99..afbf3dd 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -65,7 +65,8 @@ static struct pack_idx_entry **written_list; static uint32_t nr_objects, nr_alloc, nr_result, nr_written; static int non_empty; -static int no_reuse_delta, no_reuse_object, keep_unreachable, include_tag; +static int reuse_delta = 1, reuse_object = 1; +static int keep_unreachable, include_tag; static int local; static int incremental; static int allow_ofs_delta; @@ -251,7 +252,7 @@ static unsigned long write_object(struct sha1file *f, crc32_begin(f); obj_type = entry->type; - if (no_reuse_object) + if (!reuse_object) to_reuse = 0; /* explicit */ else if (!entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ @@ -1021,7 +1022,7 @@ static void check_object(struct object_entry *entry) unuse_pack(&w_curs); return; case OBJ_REF_DELTA: - if (!no_reuse_delta && !entry->preferred_base) + if (reuse_delta && !entry->preferred_base) base_ref = use_pack(p, &w_curs, entry->in_pack_offset + used, NULL); entry->in_pack_header_size = used + 20; @@ -1044,7 +1045,7 @@ static void check_object(struct object_entry *entry) die("delta base offset out of bound for %s", sha1_to_hex(entry->idx.sha1)); ofs = entry->in_pack_offset - ofs; - if (!no_reuse_delta && !entry->preferred_base) { + if (reuse_delta && !entry->preferred_base) { struct revindex_entry *revidx; revidx = find_pack_revindex(p, ofs); base_ref = nth_packed_object_sha1(p, revidx->nr); @@ -1232,7 +1233,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src, * We do not bother to try a delta that we discarded * on an earlier try, but only when reusing delta data. */ - if (!no_reuse_delta && trg_entry->in_pack && + if (reuse_delta && trg_entry->in_pack && trg_entry->in_pack == src_entry->in_pack && trg_entry->in_pack_type != OBJ_REF_DELTA && trg_entry->in_pack_type != OBJ_OFS_DELTA) @@ -1687,7 +1688,7 @@ static void prepare_pack(int window, int depth) if (entry->delta) /* This happens if we decided to reuse existing - * delta from a pack. "!no_reuse_delta &&" is implied. + * delta from a pack. "reuse_delta &&" is implied. */ continue; @@ -2049,11 +2050,11 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) continue; } if (!strcmp("--no-reuse-delta", arg)) { - no_reuse_delta = 1; + reuse_delta = 0; continue; } if (!strcmp("--no-reuse-object", arg)) { - no_reuse_object = no_reuse_delta = 1; + reuse_object = reuse_delta = 0; continue; } if (!strcmp("--delta-base-offset", arg)) { -- 1.5.5.1.226.g6f6e8 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/7] pack-objects: simplify the condition associated with --all-progress 2008-05-02 19:11 ` [PATCH 2/7] pack-objects: remove some double negative logic Nicolas Pitre @ 2008-05-02 19:11 ` Nicolas Pitre 2008-05-02 19:11 ` [PATCH 4/7] pack-objects: clean up write_object() a bit Nicolas Pitre 0 siblings, 1 reply; 10+ messages in thread From: Nicolas Pitre @ 2008-05-02 19:11 UTC (permalink / raw) To: Junio C Hamano; +Cc: git Signed-off-by: Nicolas Pitre <nico@cam.org> --- builtin-pack-objects.c | 3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index afbf3dd..b4a63d2 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -452,11 +452,10 @@ static void write_pack_file(void) struct sha1file *f; off_t offset, offset_one, last_obj_offset = 0; struct pack_header hdr; - int do_progress = progress >> pack_to_stdout; uint32_t nr_remaining = nr_result; time_t last_mtime = 0; - if (do_progress) + if (progress > pack_to_stdout) progress_state = start_progress("Writing objects", nr_result); written_list = xmalloc(nr_objects * sizeof(*written_list)); -- 1.5.5.1.226.g6f6e8 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 4/7] pack-objects: clean up write_object() a bit 2008-05-02 19:11 ` [PATCH 3/7] pack-objects: simplify the condition associated with --all-progress Nicolas Pitre @ 2008-05-02 19:11 ` Nicolas Pitre 2008-05-02 19:11 ` [PATCH 5/7] pack-objects: move compression code in a separate function Nicolas Pitre 0 siblings, 1 reply; 10+ messages in thread From: Nicolas Pitre @ 2008-05-02 19:11 UTC (permalink / raw) To: Junio C Hamano; +Cc: git ... for improved readability. No functional changes. Signed-off-by: Nicolas Pitre <nico@cam.org> --- builtin-pack-objects.c | 64 ++++++++++++++++++++++++----------------------- 1 files changed, 33 insertions(+), 31 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index b4a63d2..257002a 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -226,41 +226,43 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry, off_t write_offset) { - unsigned long size; + unsigned long size, limit; void *buf; - unsigned char header[10]; - unsigned char dheader[10]; + unsigned char header[10], dheader[10]; unsigned hdrlen; off_t datalen; - enum object_type obj_type; - int to_reuse = 0; - /* write limit if limited packsize and not first object */ - unsigned long limit = pack_size_limit && nr_written ? - pack_size_limit - write_offset : 0; - /* no if no delta */ - int usable_delta = !entry->delta ? 0 : - /* yes if unlimited packfile */ - !pack_size_limit ? 1 : - /* no if base written to previous pack */ - entry->delta->idx.offset == (off_t)-1 ? 0 : - /* otherwise double-check written to this - * pack, like we do below - */ - entry->delta->idx.offset ? 1 : 0; + enum object_type type; + int usable_delta, to_reuse; if (!pack_to_stdout) crc32_begin(f); - obj_type = entry->type; + type = entry->type; + + /* write limit if limited packsize and not first object */ + limit = pack_size_limit && nr_written ? + pack_size_limit - write_offset : 0; + + if (!entry->delta) + usable_delta = 0; /* no delta */ + else if (!pack_size_limit) + usable_delta = 1; /* unlimited packfile */ + else if (entry->delta->idx.offset == (off_t)-1) + usable_delta = 0; /* base was written to another pack */ + else if(entry->delta->idx.offset) + usable_delta = 1; /* base already exists in this pack */ + else + usable_delta = 0; /* base could end up in another pack */ + if (!reuse_object) to_reuse = 0; /* explicit */ else if (!entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ - else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA) + else if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) /* check_object() decided it for us ... */ to_reuse = usable_delta; /* ... but pack split may override that */ - else if (obj_type != entry->in_pack_type) + else if (type != entry->in_pack_type) to_reuse = 0; /* pack has delta which is unusable */ else if (entry->delta) to_reuse = 0; /* we want to pack afresh */ @@ -274,19 +276,19 @@ static unsigned long write_object(struct sha1file *f, unsigned long maxsize; void *out; if (!usable_delta) { - buf = read_sha1_file(entry->idx.sha1, &obj_type, &size); + buf = read_sha1_file(entry->idx.sha1, &type, &size); if (!buf) die("unable to read %s", sha1_to_hex(entry->idx.sha1)); } else if (entry->delta_data) { size = entry->delta_size; buf = entry->delta_data; entry->delta_data = NULL; - obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? + type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } else { buf = get_delta(entry); size = entry->delta_size; - obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? + type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } /* compress the data to store and put compressed length in datalen */ @@ -308,9 +310,9 @@ static unsigned long write_object(struct sha1file *f, * The object header is a byte of 'type' followed by zero or * more bytes of length. */ - hdrlen = encode_header(obj_type, size, header); + hdrlen = encode_header(type, size, header); - if (obj_type == OBJ_OFS_DELTA) { + if (type == OBJ_OFS_DELTA) { /* * Deltas with relative base contain an additional * encoding of the relative offset for the delta @@ -329,7 +331,7 @@ static unsigned long write_object(struct sha1file *f, sha1write(f, header, hdrlen); sha1write(f, dheader + pos, sizeof(dheader) - pos); hdrlen += sizeof(dheader) - pos; - } else if (obj_type == OBJ_REF_DELTA) { + } else if (type == OBJ_REF_DELTA) { /* * Deltas with a base reference contain * an additional 20 bytes for the base sha1. @@ -361,11 +363,11 @@ static unsigned long write_object(struct sha1file *f, off_t offset; if (entry->delta) { - obj_type = (allow_ofs_delta && entry->delta->idx.offset) ? + type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; reused_delta++; } - hdrlen = encode_header(obj_type, entry->size, header); + hdrlen = encode_header(type, entry->size, header); offset = entry->in_pack_offset; revidx = find_pack_revindex(p, offset); datalen = revidx[1].offset - offset; @@ -374,7 +376,7 @@ static unsigned long write_object(struct sha1file *f, die("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1)); offset += entry->in_pack_header_size; datalen -= entry->in_pack_header_size; - if (obj_type == OBJ_OFS_DELTA) { + if (type == OBJ_OFS_DELTA) { off_t ofs = entry->idx.offset - entry->delta->idx.offset; unsigned pos = sizeof(dheader) - 1; dheader[pos] = ofs & 127; @@ -385,7 +387,7 @@ static unsigned long write_object(struct sha1file *f, sha1write(f, header, hdrlen); sha1write(f, dheader + pos, sizeof(dheader) - pos); hdrlen += sizeof(dheader) - pos; - } else if (obj_type == OBJ_REF_DELTA) { + } else if (type == OBJ_REF_DELTA) { if (limit && hdrlen + 20 + datalen + 20 >= limit) return 0; sha1write(f, header, hdrlen); -- 1.5.5.1.226.g6f6e8 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 5/7] pack-objects: move compression code in a separate function 2008-05-02 19:11 ` [PATCH 4/7] pack-objects: clean up write_object() a bit Nicolas Pitre @ 2008-05-02 19:11 ` Nicolas Pitre 2008-05-02 19:11 ` [PATCH 6/7] pack-objects: allow for early delta deflating Nicolas Pitre 0 siblings, 1 reply; 10+ messages in thread From: Nicolas Pitre @ 2008-05-02 19:11 UTC (permalink / raw) To: Junio C Hamano; +Cc: git A later patch will make use of that code too. Signed-off-by: Nicolas Pitre <nico@cam.org> --- builtin-pack-objects.c | 53 ++++++++++++++++++++++++++--------------------- 1 files changed, 29 insertions(+), 24 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 257002a..4319e7b 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -124,6 +124,32 @@ static void *get_delta(struct object_entry *entry) return delta_buf; } +static unsigned long do_compress(void **pptr, unsigned long size) +{ + z_stream stream; + void *in, *out; + unsigned long maxsize; + + memset(&stream, 0, sizeof(stream)); + deflateInit(&stream, pack_compression_level); + maxsize = deflateBound(&stream, size); + + in = *pptr; + out = xmalloc(maxsize); + *pptr = out; + + stream.next_in = in; + stream.avail_in = size; + stream.next_out = out; + stream.avail_out = maxsize; + while (deflate(&stream, Z_FINISH) == Z_OK) + /* nothing */; + deflateEnd(&stream); + + free(in); + return stream.total_out; +} + /* * The per-object header is a pretty dense thing, which is * - first byte: low four bits are "size", then three bits of "type", @@ -226,11 +252,10 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry, off_t write_offset) { - unsigned long size, limit; + unsigned long size, limit, datalen; void *buf; unsigned char header[10], dheader[10]; unsigned hdrlen; - off_t datalen; enum object_type type; int usable_delta, to_reuse; @@ -272,9 +297,6 @@ static unsigned long write_object(struct sha1file *f, */ if (!to_reuse) { - z_stream stream; - unsigned long maxsize; - void *out; if (!usable_delta) { buf = read_sha1_file(entry->idx.sha1, &type, &size); if (!buf) @@ -291,20 +313,7 @@ static unsigned long write_object(struct sha1file *f, type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } - /* compress the data to store and put compressed length in datalen */ - memset(&stream, 0, sizeof(stream)); - deflateInit(&stream, pack_compression_level); - maxsize = deflateBound(&stream, size); - out = xmalloc(maxsize); - /* Compress it */ - stream.next_in = buf; - stream.avail_in = size; - stream.next_out = out; - stream.avail_out = maxsize; - while (deflate(&stream, Z_FINISH) == Z_OK) - /* nothing */; - deflateEnd(&stream); - datalen = stream.total_out; + datalen = do_compress(&buf, size); /* * The object header is a byte of 'type' followed by zero or @@ -324,7 +333,6 @@ static unsigned long write_object(struct sha1file *f, while (ofs >>= 7) dheader[--pos] = 128 | (--ofs & 127); if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { - free(out); free(buf); return 0; } @@ -337,7 +345,6 @@ static unsigned long write_object(struct sha1file *f, * an additional 20 bytes for the base sha1. */ if (limit && hdrlen + 20 + datalen + 20 >= limit) { - free(out); free(buf); return 0; } @@ -346,14 +353,12 @@ static unsigned long write_object(struct sha1file *f, hdrlen += 20; } else { if (limit && hdrlen + datalen + 20 >= limit) { - free(out); free(buf); return 0; } sha1write(f, header, hdrlen); } - sha1write(f, out, datalen); - free(out); + sha1write(f, buf, datalen); free(buf); } else { -- 1.5.5.1.226.g6f6e8 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 6/7] pack-objects: allow for early delta deflating 2008-05-02 19:11 ` [PATCH 5/7] pack-objects: move compression code in a separate function Nicolas Pitre @ 2008-05-02 19:11 ` Nicolas Pitre 2008-05-02 19:11 ` [PATCH 7/7] pack-objects: fix early eviction for max depth delta objects Nicolas Pitre 2008-05-02 22:44 ` [PATCH 6/7] pack-objects: allow for early delta deflating A Large Angry SCM 0 siblings, 2 replies; 10+ messages in thread From: Nicolas Pitre @ 2008-05-02 19:11 UTC (permalink / raw) To: Junio C Hamano; +Cc: git When the delta data is cached in memory until it is written to a pack file on disk, it is best to compress it right away in find_deltas() for the following reasons: - we have to compress that data anyway; - this allows for caching more deltas with the same cache size limit; - compression is potentially threaded. This last point is especially relevant for SMP run time. For example, repacking the Linux repo on a quad core processor using 4 threads with all default settings produce the following results before this change: real 2m27.929s user 4m36.492s sys 0m3.091s And with this change applied: real 2m13.787s user 4m37.486s sys 0m3.159s So the actual execution time stayed more or less the same but the wall clock time is shorter. This is however not a good thing to do when generating a pack for network transmission. In that case, the network is most likely to throttle the data throughput, so it is best to make find_deltas() faster in order to start writing data ASAP since we can afford spending more time between writes to compress the data at that point. Signed-off-by: Nicolas Pitre <nico@cam.org> --- builtin-pack-objects.c | 35 ++++++++++++++++++++++++++++++++++- 1 files changed, 34 insertions(+), 1 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 4319e7b..f5dcd20 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -43,6 +43,7 @@ struct object_entry { */ void *delta_data; /* cached delta (uncompressed) */ unsigned long delta_size; /* delta data size (uncompressed) */ + unsigned long z_delta_size; /* delta data size (compressed) */ unsigned int hash; /* name hint hash */ enum object_type type; enum object_type in_pack_type; /* could be delta */ @@ -301,6 +302,11 @@ static unsigned long write_object(struct sha1file *f, buf = read_sha1_file(entry->idx.sha1, &type, &size); if (!buf) die("unable to read %s", sha1_to_hex(entry->idx.sha1)); + /* make sure no cached delta data remains from a + previous attempt before a pack split occured */ + free(entry->delta_data); + entry->delta_data = NULL; + entry->z_delta_size = 0; } else if (entry->delta_data) { size = entry->delta_size; buf = entry->delta_data; @@ -313,7 +319,11 @@ static unsigned long write_object(struct sha1file *f, type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; } - datalen = do_compress(&buf, size); + + if (entry->z_delta_size) + datalen = entry->z_delta_size; + else + datalen = do_compress(&buf, size); /* * The object header is a byte of 'type' followed by zero or @@ -1447,6 +1457,29 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, best_base = other_idx; } + /* + * If we decided to cache the delta data, then it is best + * to compress it right away. First because we have to do + * it anyway, and doing it here while we're threaded will + * save a lot of time in the non threaded write phase, + * as well as allow for caching more deltas within + * the same cache size limit. + * ... + * But only if not writing to stdout, since in that case + * the network is most likely throttling writes anyway, + * and therefore it is best to go to the write phase ASAP + * instead, as we can afford spending more time compressing + * between writes at that moment. + */ + if (entry->delta_data && !pack_to_stdout) { + entry->z_delta_size = do_compress(&entry->delta_data, + entry->delta_size); + cache_lock(); + delta_cache_size -= entry->delta_size; + delta_cache_size += entry->z_delta_size; + cache_unlock(); + } + /* if we made n a delta, and if n is already at max * depth, leaving it in the window is pointless. we * should evict it first. -- 1.5.5.1.226.g6f6e8 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 7/7] pack-objects: fix early eviction for max depth delta objects 2008-05-02 19:11 ` [PATCH 6/7] pack-objects: allow for early delta deflating Nicolas Pitre @ 2008-05-02 19:11 ` Nicolas Pitre 2008-05-02 22:44 ` [PATCH 6/7] pack-objects: allow for early delta deflating A Large Angry SCM 1 sibling, 0 replies; 10+ messages in thread From: Nicolas Pitre @ 2008-05-02 19:11 UTC (permalink / raw) To: Junio C Hamano; +Cc: git The 'depth' variable doesn't reflect the actual maximum depth used when other objects already depend on the current one. Signed-off-by: Nicolas Pitre <nico@cam.org> --- builtin-pack-objects.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index f5dcd20..94c9875 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -1484,7 +1484,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, * depth, leaving it in the window is pointless. we * should evict it first. */ - if (entry->delta && depth <= n->depth) + if (entry->delta && max_depth <= n->depth) continue; /* -- 1.5.5.1.226.g6f6e8 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 6/7] pack-objects: allow for early delta deflating 2008-05-02 19:11 ` [PATCH 6/7] pack-objects: allow for early delta deflating Nicolas Pitre 2008-05-02 19:11 ` [PATCH 7/7] pack-objects: fix early eviction for max depth delta objects Nicolas Pitre @ 2008-05-02 22:44 ` A Large Angry SCM 2008-05-02 23:03 ` Nicolas Pitre 1 sibling, 1 reply; 10+ messages in thread From: A Large Angry SCM @ 2008-05-02 22:44 UTC (permalink / raw) To: Nicolas Pitre; +Cc: Junio C Hamano, git Nicolas Pitre wrote: > When the delta data is cached in memory until it is written to a pack > file on disk, it is best to compress it right away in find_deltas() for > the following reasons: > > - we have to compress that data anyway; > > - this allows for caching more deltas with the same cache size limit; > > - compression is potentially threaded. > > This last point is especially relevant for SMP run time. For example, > repacking the Linux repo on a quad core processor using 4 threads with > all default settings produce the following results before this change: > > real 2m27.929s > user 4m36.492s > sys 0m3.091s > > And with this change applied: > > real 2m13.787s > user 4m37.486s > sys 0m3.159s > > So the actual execution time stayed more or less the same but the > wall clock time is shorter. > > This is however not a good thing to do when generating a pack for > network transmission. In that case, the network is most likely to > throttle the data throughput, so it is best to make find_deltas() > faster in order to start writing data ASAP since we can afford > spending more time between writes to compress the data > at that point. [...] > > + /* > + * If we decided to cache the delta data, then it is best > + * to compress it right away. First because we have to do > + * it anyway, and doing it here while we're threaded will > + * save a lot of time in the non threaded write phase, > + * as well as allow for caching more deltas within > + * the same cache size limit. > + * ... > + * But only if not writing to stdout, since in that case > + * the network is most likely throttling writes anyway, > + * and therefore it is best to go to the write phase ASAP > + * instead, as we can afford spending more time compressing > + * between writes at that moment. > + */ > + if (entry->delta_data && !pack_to_stdout) { > + entry->z_delta_size = do_compress(&entry->delta_data, > + entry->delta_size); > + cache_lock(); > + delta_cache_size -= entry->delta_size; > + delta_cache_size += entry->z_delta_size; > + cache_unlock(); > + } > + > /* if we made n a delta, and if n is already at max > * depth, leaving it in the window is pointless. we > * should evict it first. Although I like the idea of changing the behavior if the output is likely to be throttled, I do not like the test for that condition being "is it going to stdout". This is something better suited to a command line argument. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 6/7] pack-objects: allow for early delta deflating 2008-05-02 22:44 ` [PATCH 6/7] pack-objects: allow for early delta deflating A Large Angry SCM @ 2008-05-02 23:03 ` Nicolas Pitre 0 siblings, 0 replies; 10+ messages in thread From: Nicolas Pitre @ 2008-05-02 23:03 UTC (permalink / raw) To: A Large Angry SCM; +Cc: Junio C Hamano, git On Fri, 2 May 2008, A Large Angry SCM wrote: > Although I like the idea of changing the behavior if the output is likely to > be throttled, I do not like the test for that condition being "is it going to > stdout". This is something better suited to a command line argument. This is a common assumptions for stdio to go over the network, and other behaviors are already adjusted accordingly. And not only in pack-objects but in index-pack too. So unless you have a really compelling reason to override this, I don't think it is worth having another command line argument just for the sake of it. Nicolas ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2008-05-02 23:04 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-05-02 19:11 [PATCH 0/7] assorted pack-objects cleanups and improvements Nicolas Pitre 2008-05-02 19:11 ` [PATCH 1/7] pack-objects: small cleanup Nicolas Pitre 2008-05-02 19:11 ` [PATCH 2/7] pack-objects: remove some double negative logic Nicolas Pitre 2008-05-02 19:11 ` [PATCH 3/7] pack-objects: simplify the condition associated with --all-progress Nicolas Pitre 2008-05-02 19:11 ` [PATCH 4/7] pack-objects: clean up write_object() a bit Nicolas Pitre 2008-05-02 19:11 ` [PATCH 5/7] pack-objects: move compression code in a separate function Nicolas Pitre 2008-05-02 19:11 ` [PATCH 6/7] pack-objects: allow for early delta deflating Nicolas Pitre 2008-05-02 19:11 ` [PATCH 7/7] pack-objects: fix early eviction for max depth delta objects Nicolas Pitre 2008-05-02 22:44 ` [PATCH 6/7] pack-objects: allow for early delta deflating A Large Angry SCM 2008-05-02 23:03 ` Nicolas Pitre
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).