All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>,
	"Junio C Hamano" <gitster@pobox.com>
Subject: [PATCH 1/2] index-pack: reduce object_entry size to save memory
Date: Sat, 18 Apr 2015 17:47:04 +0700	[thread overview]
Message-ID: <1429354025-24659-2-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1429354025-24659-1-git-send-email-pclouds@gmail.com>

For each object in the input pack, we need one struct object_entry. On
x86-64, this struct is 64 bytes long. Although:

 - The 8 bytes for delta_depth and base_object_no are only useful when
   show_stat is set. And it's never set unless someone is debugging.

 - The three fields hdr_size, type and real_type take 4 bytes each
   even though they never use more than 4 bits.

By moving delta_depth and base_object_no out of struct object_entry
and make the other 3 fields one byte long instead of 4, we shrink 25%
of this struct.

On a 3.4M object repo (*) that's about 53MB. The saving is less
impressive compared to index-pack memory use for basic bookkeeping (**),
about 16%.

(*) linux-2.6.git already has 4M objects as of v3.19-rc7 so this is
not an unrealistic number of objects that we have to deal with.

(**)  3.4M * (sizeof(object_entry) + sizeof(delta_entry)) = 311MB

Brought-up-by: Matthew Sporleder <msporleder@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/index-pack.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 4632117..9d854fb 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -18,9 +18,12 @@ static const char index_pack_usage[] =
 struct object_entry {
 	struct pack_idx_entry idx;
 	unsigned long size;
-	unsigned int hdr_size;
-	enum object_type type;
-	enum object_type real_type;
+	unsigned char hdr_size;
+	signed char type;
+	signed char real_type;
+};
+
+struct object_stat {
 	unsigned delta_depth;
 	int base_object_no;
 };
@@ -64,6 +67,7 @@ struct delta_entry {
 };
 
 static struct object_entry *objects;
+static struct object_stat *obj_stat;
 static struct delta_entry *deltas;
 static struct thread_local nothread_data;
 static int nr_objects;
@@ -873,13 +877,15 @@ static void resolve_delta(struct object_entry *delta_obj,
 	void *base_data, *delta_data;
 
 	if (show_stat) {
-		delta_obj->delta_depth = base->obj->delta_depth + 1;
+		int i = delta_obj - objects;
+		int j = base->obj - objects;
+		obj_stat[i].delta_depth = obj_stat[j].delta_depth + 1;
 		deepest_delta_lock();
-		if (deepest_delta < delta_obj->delta_depth)
-			deepest_delta = delta_obj->delta_depth;
+		if (deepest_delta < obj_stat[i].delta_depth)
+			deepest_delta = obj_stat[i].delta_depth;
 		deepest_delta_unlock();
+		obj_stat[i].base_object_no = j;
 	}
-	delta_obj->base_object_no = base->obj - objects;
 	delta_data = get_data_from_pack(delta_obj);
 	base_data = get_base_data(base);
 	result->obj = delta_obj;
@@ -902,7 +908,7 @@ static void resolve_delta(struct object_entry *delta_obj,
  * "want"; if so, swap in "set" and return true. Otherwise, leave it untouched
  * and return false.
  */
-static int compare_and_swap_type(enum object_type *type,
+static int compare_and_swap_type(signed char *type,
 				 enum object_type want,
 				 enum object_type set)
 {
@@ -1499,7 +1505,7 @@ static void show_pack_info(int stat_only)
 		struct object_entry *obj = &objects[i];
 
 		if (is_delta_type(obj->type))
-			chain_histogram[obj->delta_depth - 1]++;
+			chain_histogram[obj_stat[i].delta_depth - 1]++;
 		if (stat_only)
 			continue;
 		printf("%s %-6s %lu %lu %"PRIuMAX,
@@ -1508,8 +1514,8 @@ static void show_pack_info(int stat_only)
 		       (unsigned long)(obj[1].idx.offset - obj->idx.offset),
 		       (uintmax_t)obj->idx.offset);
 		if (is_delta_type(obj->type)) {
-			struct object_entry *bobj = &objects[obj->base_object_no];
-			printf(" %u %s", obj->delta_depth, sha1_to_hex(bobj->idx.sha1));
+			struct object_entry *bobj = &objects[obj_stat[i].base_object_no];
+			printf(" %u %s", obj_stat[i].delta_depth, sha1_to_hex(bobj->idx.sha1));
 		}
 		putchar('\n');
 	}
@@ -1672,6 +1678,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
 	curr_pack = open_pack_file(pack_name);
 	parse_pack_header();
 	objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
+	if (show_stat)
+		obj_stat = xcalloc(nr_objects + 1, sizeof(struct object_stat));
 	deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
 	parse_pack_objects(pack_sha1);
 	resolve_deltas();
-- 
2.3.0.rc1.137.g477eb31

  reply	other threads:[~2015-04-18 10:47 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-18 10:47 [PATCH 0/2] nd/slim-index-pack-memory-usage update Nguyễn Thái Ngọc Duy
2015-04-18 10:47 ` Nguyễn Thái Ngọc Duy [this message]
2015-04-18 10:47 ` [PATCH 2/2] index-pack: kill union delta_base to save memory Nguyễn Thái Ngọc Duy
2015-07-03 16:51   ` Junio C Hamano
2015-07-03 18:29     ` Duy Nguyen
2015-07-04  1:21       ` [PATCH] index-pack: fix overallocation of sorted_by_pos array Junio C Hamano
2015-07-04 22:30         ` [PATCH] index-pack: fix allocation " Junio C Hamano
2015-07-06 23:23           ` Junio C Hamano
2015-07-07  0:36           ` Duy Nguyen
2015-07-07 15:49             ` Junio C Hamano
2015-07-07 16:06               ` Jeff King
2015-07-08 11:56                 ` [PATCH 1/2] index-pack: rename the field "type" to "in_pack_type" Nguyễn Thái Ngọc Duy
2015-07-08 11:56                   ` [PATCH 2/2] pack-objects: rename the field "type" to "real_type" Nguyễn Thái Ngọc Duy
2015-07-08 13:47                     ` Jeff King
2015-07-08 13:57                       ` Duy Nguyen
2015-07-08 14:11                         ` Jeff King
2015-07-04 22:24       ` [PATCH 2/2] index-pack: kill union delta_base to save memory Junio C Hamano
  -- strict thread matches above, loose matches on Subject: below --
2015-02-20  1:58 [PATCH 0/2] nd/slim-index-pack-memory-usage updates Nguyễn Thái Ngọc Duy
2015-02-20  1:58 ` [PATCH 1/2] index-pack: reduce object_entry size to save memory Nguyễn Thái Ngọc Duy
2015-02-23  2:37   ` Junio C Hamano
2015-02-23  3:38     ` Duy Nguyen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1429354025-24659-2-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.