From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
msporleder@gmail.com, "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v2 1/2] index-pack: reduce object_entry size to save memory
Date: Thu, 26 Feb 2015 17:52:07 +0700 [thread overview]
Message-ID: <1424947928-19396-2-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1424947928-19396-1-git-send-email-pclouds@gmail.com>
For each object in the input pack, we need one struct object_entry. On
x86-64, this struct is 64 bytes long. Although:
- The 8 bytes for delta_depth and base_object_no are only useful when
show_stat is set. And it's never set unless someone is debugging.
- The three fields hdr_size, type and real_type take 4 bytes each
even though they never use more than 4 bits.
By moving delta_depth and base_object_no out of struct object_entry
and make the other 3 fields one byte long instead of 4, we shrink 25%
of this struct.
On a 3.4M object repo (*) that's about 53MB. The saving is less
impressive compared to index-pack memory use for basic bookkeeping (**),
about 16%.
(*) linux-2.6.git already has 4M objects as of v3.19-rc7 so this is
not an unrealistic number of objects that we have to deal with.
(**) 3.4M * (sizeof(object_entry) + sizeof(delta_entry)) = 311MB
Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
builtin/index-pack.c | 30 +++++++++++++++++++-----------
1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 4632117..9d854fb 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -18,9 +18,12 @@ static const char index_pack_usage[] =
struct object_entry {
struct pack_idx_entry idx;
unsigned long size;
- unsigned int hdr_size;
- enum object_type type;
- enum object_type real_type;
+ unsigned char hdr_size;
+ signed char type;
+ signed char real_type;
+};
+
+struct object_stat {
unsigned delta_depth;
int base_object_no;
};
@@ -64,6 +67,7 @@ struct delta_entry {
};
static struct object_entry *objects;
+static struct object_stat *obj_stat;
static struct delta_entry *deltas;
static struct thread_local nothread_data;
static int nr_objects;
@@ -873,13 +877,15 @@ static void resolve_delta(struct object_entry *delta_obj,
void *base_data, *delta_data;
if (show_stat) {
- delta_obj->delta_depth = base->obj->delta_depth + 1;
+ int i = delta_obj - objects;
+ int j = base->obj - objects;
+ obj_stat[i].delta_depth = obj_stat[j].delta_depth + 1;
deepest_delta_lock();
- if (deepest_delta < delta_obj->delta_depth)
- deepest_delta = delta_obj->delta_depth;
+ if (deepest_delta < obj_stat[i].delta_depth)
+ deepest_delta = obj_stat[i].delta_depth;
deepest_delta_unlock();
+ obj_stat[i].base_object_no = j;
}
- delta_obj->base_object_no = base->obj - objects;
delta_data = get_data_from_pack(delta_obj);
base_data = get_base_data(base);
result->obj = delta_obj;
@@ -902,7 +908,7 @@ static void resolve_delta(struct object_entry *delta_obj,
* "want"; if so, swap in "set" and return true. Otherwise, leave it untouched
* and return false.
*/
-static int compare_and_swap_type(enum object_type *type,
+static int compare_and_swap_type(signed char *type,
enum object_type want,
enum object_type set)
{
@@ -1499,7 +1505,7 @@ static void show_pack_info(int stat_only)
struct object_entry *obj = &objects[i];
if (is_delta_type(obj->type))
- chain_histogram[obj->delta_depth - 1]++;
+ chain_histogram[obj_stat[i].delta_depth - 1]++;
if (stat_only)
continue;
printf("%s %-6s %lu %lu %"PRIuMAX,
@@ -1508,8 +1514,8 @@ static void show_pack_info(int stat_only)
(unsigned long)(obj[1].idx.offset - obj->idx.offset),
(uintmax_t)obj->idx.offset);
if (is_delta_type(obj->type)) {
- struct object_entry *bobj = &objects[obj->base_object_no];
- printf(" %u %s", obj->delta_depth, sha1_to_hex(bobj->idx.sha1));
+ struct object_entry *bobj = &objects[obj_stat[i].base_object_no];
+ printf(" %u %s", obj_stat[i].delta_depth, sha1_to_hex(bobj->idx.sha1));
}
putchar('\n');
}
@@ -1672,6 +1678,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
curr_pack = open_pack_file(pack_name);
parse_pack_header();
objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
+ if (show_stat)
+ obj_stat = xcalloc(nr_objects + 1, sizeof(struct object_stat));
deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
parse_pack_objects(pack_sha1);
resolve_deltas();
--
2.3.0.rc1.137.g477eb31
next prev parent reply other threads:[~2015-02-26 10:52 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-02-20 1:58 [PATCH 0/2] nd/slim-index-pack-memory-usage updates Nguyễn Thái Ngọc Duy
2015-02-20 1:58 ` [PATCH 1/2] index-pack: reduce object_entry size to save memory Nguyễn Thái Ngọc Duy
2015-02-23 2:37 ` Junio C Hamano
2015-02-23 3:38 ` Duy Nguyen
2015-02-20 1:58 ` [PATCH 2/2] index-pack: kill union delta_base " Nguyễn Thái Ngọc Duy
2015-02-26 10:52 ` [PATCH v2 0/2] nd/slim-index-pack-memory-usage updates Nguyễn Thái Ngọc Duy
2015-02-26 10:52 ` Nguyễn Thái Ngọc Duy [this message]
2015-02-26 10:52 ` [PATCH v2 2/2] index-pack: kill union delta_base to save memory Nguyễn Thái Ngọc Duy
2015-02-27 21:18 ` Junio C Hamano
2015-02-28 11:44 ` Duy Nguyen
2015-03-01 2:37 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1424947928-19396-2-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=msporleder@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).