git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Martin Koegler <mkoegler@auto.tuwien.ac.at>
To: Junio C Hamano <junkio@cox.net>
Cc: git@vger.kernel.org, Martin Koegler <mkoegler@auto.tuwien.ac.at>
Subject: [PATCH] git-pack-objects: cache small deltas between big objects
Date: Sun, 20 May 2007 23:11:04 +0200	[thread overview]
Message-ID: <11796954641778-git-send-email-mkoegler@auto.tuwien.ac.at> (raw)

Creating deltas between big blobs is a CPU and memory intensive task.
In the writing phase, all (not reused) deltas are redone.

This patch adds support for caching deltas from the deltifing phase, so
that that the writing phase is faster.

The caching is limited to small deltas to avoid increasing memory usage very much.
The implemented limit is (memory needed to create the delta)/1024.

Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
 builtin-pack-objects.c |   35 +++++++++++++++++++++++++----------
 1 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index d165f10..13429d0 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -35,6 +35,7 @@ struct object_entry {
 	struct object_entry *delta_sibling; /* other deltified objects who
 					     * uses the same base as me
 					     */
+	void *delta_data;	/* cached delta (uncompressed) */
 	unsigned long delta_size;	/* delta data size (uncompressed) */
 	enum object_type type;
 	enum object_type in_pack_type;	/* could be delta */
@@ -380,17 +381,24 @@ static unsigned long write_object(struct sha1file *f,
 				 */
 
 	if (!to_reuse) {
-		buf = read_sha1_file(entry->sha1, &type, &size);
-		if (!buf)
-			die("unable to read %s", sha1_to_hex(entry->sha1));
-		if (size != entry->size)
-			die("object %s size inconsistency (%lu vs %lu)",
-			    sha1_to_hex(entry->sha1), size, entry->size);
-		if (entry->delta) {
-			buf = delta_against(buf, size, entry);
+		if (entry->delta_data) {
+			buf = entry->delta_data;
 			size = entry->delta_size;
 			obj_type = (allow_ofs_delta && entry->delta->offset) ?
-				OBJ_OFS_DELTA : OBJ_REF_DELTA;
+					OBJ_OFS_DELTA : OBJ_REF_DELTA;
+		} else {
+			buf = read_sha1_file(entry->sha1, &type, &size);
+			if (!buf)
+				die("unable to read %s", sha1_to_hex(entry->sha1));
+			if (size != entry->size)
+				die("object %s size inconsistency (%lu vs %lu)",
+				    sha1_to_hex(entry->sha1), size, entry->size);
+			if (entry->delta) {
+				buf = delta_against(buf, size, entry);
+				size = entry->delta_size;
+				obj_type = (allow_ofs_delta && entry->delta->offset) ?
+					OBJ_OFS_DELTA : OBJ_REF_DELTA;
+			}
 		}
 		/*
 		 * The object header is a byte of 'type' followed by zero or
@@ -1294,10 +1302,17 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	if (!delta_buf)
 		return 0;
 
+	if (trg_entry->delta_data)
+		free (trg_entry->delta_data);
+	trg_entry->delta_data = 0;
 	trg_entry->delta = src_entry;
 	trg_entry->delta_size = delta_size;
 	trg_entry->depth = src_entry->depth + 1;
-	free(delta_buf);
+	/* cache delta, if objects are large enough compared to delta size */
+	if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
+		trg_entry->delta_data = delta_buf;
+	else
+		free(delta_buf);
 	return 1;
 }
 
-- 
1.5.2.rc3.802.g4b4b7

             reply	other threads:[~2007-05-20 21:11 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-05-20 21:11 Martin Koegler [this message]
2007-05-21  4:35 ` [PATCH] git-pack-objects: cache small deltas between big objects Dana How
2007-05-21 17:59   ` Martin Koegler
2007-05-22  7:01     ` Dana How
2007-05-22  8:04       ` Junio C Hamano
2007-05-22  9:25         ` Dana How
2007-05-21  4:54 ` Junio C Hamano
2007-05-21 17:00   ` Martin Koegler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=11796954641778-git-send-email-mkoegler@auto.tuwien.ac.at \
    --to=mkoegler@auto.tuwien.ac.at \
    --cc=git@vger.kernel.org \
    --cc=junkio@cox.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).