git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Shawn O. Pearce" <spearce@spearce.org>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org, Nicolas Pitre <nico@cam.org>,
	Stephan Hennig <mailing_list@arcor.de>,
	Andreas Ericsson <ae@op5.se>
Subject: [PATCH v2] index-pack: Honor core.deltaBaseCacheLimit when resolving deltas
Date: Tue, 15 Jul 2008 04:45:34 +0000	[thread overview]
Message-ID: <20080715044534.GA2794@spearce.org> (raw)
In-Reply-To: <20080715031800.GD1700@spearce.org>

If we are trying to resolve deltas for a long delta chain composed
of multi-megabyte objects we can easily run into requiring 500M+
of memory to hold each object in the chain on the call stack while
we recurse into the dependent objects and resolve them.

We now use a simple delta cache that discards objects near the
bottom of the call stack first, as they are the most least recently
used objects in this current delta chain.  If we recurse out of a
chain we may find the base object is no longer available, as it was
free'd to keep memory under the deltaBaseCacheLimit.  In such cases
we must unpack the base object again, which will require recursing
back to the root of the top of the delta chain as we released that
root first.

The astute reader will probably realize that we can still exceed
the delta base cache limit, but this happens only if the most
recent base plus the delta plus the inflated dependent sum up to
more than the base cache limit.  Due to the way patch_delta is
currently implemented we cannot operate in less memory anyway.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---

 Version 2 plugs the case Nico noticed, where the patch was causing
 the exact behavior it was trying to prevent while recovering from
 what it did to avoid the excessive memory usage in the first place.

 The change was in get_base_data() where we now unpack the delta
 after we have unpacked the base.  Nico and I both missed that we
 must also bump base_cache_used when we restore the base, and we
 must also prune the bases in case this base has caused us to go
 over the limit.

 :-)

 index-pack.c |   48 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/index-pack.c b/index-pack.c
index 7239e89..b4ec736 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -52,6 +52,7 @@ struct delta_entry
 static struct object_entry *objects;
 static struct delta_entry *deltas;
 static struct base_data *base_cache;
+static size_t base_cache_used;
 static int nr_objects;
 static int nr_deltas;
 static int nr_resolved_deltas;
@@ -220,6 +221,20 @@ static void bad_object(unsigned long offset, const char *format, ...)
 	die("pack has bad object at offset %lu: %s", offset, buf);
 }
 
+static void prune_base_data(struct base_data *retain)
+{
+	struct base_data *b = base_cache;
+	for (b = base_cache;
+	     base_cache_used > delta_base_cache_limit && b;
+	     b = b->child) {
+		if (b->data && b != retain) {
+			free(b->data);
+			b->data = NULL;
+			base_cache_used -= b->size;
+		}
+	}
+}
+
 static void link_base_data(struct base_data *base, struct base_data *c)
 {
 	if (base)
@@ -229,6 +244,8 @@ static void link_base_data(struct base_data *base, struct base_data *c)
 
 	c->base = base;
 	c->child = NULL;
+	base_cache_used += c->size;
+	prune_base_data(c);
 }
 
 static void unlink_base_data(struct base_data *c)
@@ -238,7 +255,10 @@ static void unlink_base_data(struct base_data *c)
 		base->child = NULL;
 	else
 		base_cache = NULL;
-	free(c->data);
+	if (c->data) {
+		free(c->data);
+		base_cache_used -= c->size;
+	}
 }
 
 static void *unpack_entry_data(unsigned long offset, unsigned long size)
@@ -456,6 +476,30 @@ static void sha1_object(const void *data, unsigned long size,
 	}
 }
 
+static void *get_base_data(struct base_data *c)
+{
+	if (!c->data) {
+		struct object_entry *obj = c->obj;
+
+		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
+			void *base = get_base_data(c->base);
+			void *raw = get_data_from_pack(obj);
+			c->data = patch_delta(
+				base, c->base->size,
+				raw, obj->size,
+				&c->size);
+			free(raw);
+			if (!c->data)
+				bad_object(obj->idx.offset, "failed to apply delta");
+		} else
+			c->data = get_data_from_pack(obj);
+
+		base_cache_used += c->size;
+		prune_base_data(c);
+	}
+	return c->data;
+}
+
 static void resolve_delta(struct object_entry *delta_obj,
 			  struct base_data *base_obj, enum object_type type)
 {
@@ -468,7 +512,7 @@ static void resolve_delta(struct object_entry *delta_obj,
 	delta_obj->real_type = type;
 	delta_data = get_data_from_pack(delta_obj);
 	delta_size = delta_obj->size;
-	result.data = patch_delta(base_obj->data, base_obj->size,
+	result.data = patch_delta(get_base_data(base_obj), base_obj->size,
 			     delta_data, delta_size,
 			     &result.size);
 	free(delta_data);
-- 
1.5.6.2.393.g45096

  reply	other threads:[~2008-07-15  4:47 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-10 14:40 git pull is slow Stephan Hennig
2008-07-10 15:13 ` Martin Langhoff
2008-07-10 15:28 ` Petr Baudis
2008-07-10 15:30 ` Johannes Sixt
2008-07-10 15:45   ` Stephan Hennig
2008-07-10 15:50     ` Petr Baudis
2008-07-10 17:44       ` Stephan Hennig
2008-07-11 12:25 ` Stephan Hennig
2008-07-11 13:34   ` Andreas Ericsson
2008-07-11 14:04     ` Johannes Schindelin
2008-07-12 12:32       ` Stephan Hennig
2008-07-12 17:05         ` Johannes Schindelin
2008-07-13  1:15           ` Shawn O. Pearce
2008-07-13 13:59             ` Johannes Schindelin
2008-07-13 22:11               ` Shawn O. Pearce
2008-07-14  2:07             ` [PATCH 0/4] Honor core.deltaBaseCacheLimit during index-pack Shawn O. Pearce
2008-07-14  2:27               ` Nicolas Pitre
2008-07-14  3:12                 ` Shawn O. Pearce
2008-07-14 11:44                   ` Johannes Schindelin
2008-07-14 11:54                     ` Jakub Narebski
2008-07-14 12:10                       ` Johannes Schindelin
2008-07-14 12:16                       ` Andreas Ericsson
2008-07-14 12:25                         ` Johannes Schindelin
2008-07-14 12:51                           ` Andreas Ericsson
2008-07-14 12:58                             ` Johannes Schindelin
2008-07-15  2:21                             ` Nicolas Pitre
2008-07-15  2:47                               ` Shawn O. Pearce
2008-07-15  3:06                                 ` Nicolas Pitre
2008-07-17 16:06                                 ` Stephan Hennig
2008-07-17 16:25                                   ` Nicolas Pitre
2008-07-17 21:35                                     ` Shawn O. Pearce
2008-07-17 22:02                                       ` [RFC PATCH] index-pack: Issue a warning if deltaBaseCacheLimit is too small Shawn O. Pearce
2008-07-17 23:45                                         ` Nicolas Pitre
2008-07-15  4:19                       ` [PATCH 0/4] Honor core.deltaBaseCacheLimit during index-pack Shawn O. Pearce
2008-07-14  2:07             ` [PATCH 1/4] index-pack: Refactor base arguments of resolve_delta into a struct Shawn O. Pearce
2008-07-15  2:40               ` Nicolas Pitre
2008-07-14  2:07             ` [PATCH 2/4] index-pack: Chain the struct base_data on the stack for traversal Shawn O. Pearce
2008-07-15  2:48               ` Nicolas Pitre
2008-07-14  2:07             ` [PATCH 3/4] index-pack: Track the object_entry that creates each base_data Shawn O. Pearce
2008-07-14 10:15               ` Johannes Schindelin
2008-07-15  2:50               ` Nicolas Pitre
2008-07-15  3:20                 ` Shawn O. Pearce
2008-07-15  3:42                   ` Nicolas Pitre
2008-07-14  2:07             ` [PATCH 4/4] index-pack: Honor core.deltaBaseCacheLimit when resolving deltas Shawn O. Pearce
2008-07-15  3:05               ` Nicolas Pitre
2008-07-15  3:18                 ` Shawn O. Pearce
2008-07-15  4:45                   ` Shawn O. Pearce [this message]
2008-07-15  5:05                     ` [PATCH v2] " Nicolas Pitre
2008-07-15 18:48                     ` Junio C Hamano
2008-07-13  9:01           ` git pull is slow Stephan Hennig
2008-07-11 12:55 ` Stephan Hennig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080715044534.GA2794@spearce.org \
    --to=spearce@spearce.org \
    --cc=ae@op5.se \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=mailing_list@arcor.de \
    --cc=nico@cam.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).