git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] git-pack-objects: cache small deltas between big objects
@ 2007-05-20 21:11 Martin Koegler
  2007-05-21  4:35 ` Dana How
  2007-05-21  4:54 ` Junio C Hamano
  0 siblings, 2 replies; 8+ messages in thread
From: Martin Koegler @ 2007-05-20 21:11 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Martin Koegler

Creating deltas between big blobs is a CPU and memory intensive task.
In the writing phase, all (not reused) deltas are redone.

This patch adds support for caching deltas from the deltifing phase, so
that that the writing phase is faster.

The caching is limited to small deltas to avoid increasing memory usage very much.
The implemented limit is (memory needed to create the delta)/1024.

Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
 builtin-pack-objects.c |   35 +++++++++++++++++++++++++----------
 1 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index d165f10..13429d0 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -35,6 +35,7 @@ struct object_entry {
 	struct object_entry *delta_sibling; /* other deltified objects who
 					     * uses the same base as me
 					     */
+	void *delta_data;	/* cached delta (uncompressed) */
 	unsigned long delta_size;	/* delta data size (uncompressed) */
 	enum object_type type;
 	enum object_type in_pack_type;	/* could be delta */
@@ -380,17 +381,24 @@ static unsigned long write_object(struct sha1file *f,
 				 */
 
 	if (!to_reuse) {
-		buf = read_sha1_file(entry->sha1, &type, &size);
-		if (!buf)
-			die("unable to read %s", sha1_to_hex(entry->sha1));
-		if (size != entry->size)
-			die("object %s size inconsistency (%lu vs %lu)",
-			    sha1_to_hex(entry->sha1), size, entry->size);
-		if (entry->delta) {
-			buf = delta_against(buf, size, entry);
+		if (entry->delta_data) {
+			buf = entry->delta_data;
 			size = entry->delta_size;
 			obj_type = (allow_ofs_delta && entry->delta->offset) ?
-				OBJ_OFS_DELTA : OBJ_REF_DELTA;
+					OBJ_OFS_DELTA : OBJ_REF_DELTA;
+		} else {
+			buf = read_sha1_file(entry->sha1, &type, &size);
+			if (!buf)
+				die("unable to read %s", sha1_to_hex(entry->sha1));
+			if (size != entry->size)
+				die("object %s size inconsistency (%lu vs %lu)",
+				    sha1_to_hex(entry->sha1), size, entry->size);
+			if (entry->delta) {
+				buf = delta_against(buf, size, entry);
+				size = entry->delta_size;
+				obj_type = (allow_ofs_delta && entry->delta->offset) ?
+					OBJ_OFS_DELTA : OBJ_REF_DELTA;
+			}
 		}
 		/*
 		 * The object header is a byte of 'type' followed by zero or
@@ -1294,10 +1302,17 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	if (!delta_buf)
 		return 0;
 
+	if (trg_entry->delta_data)
+		free (trg_entry->delta_data);
+	trg_entry->delta_data = 0;
 	trg_entry->delta = src_entry;
 	trg_entry->delta_size = delta_size;
 	trg_entry->depth = src_entry->depth + 1;
-	free(delta_buf);
+	/* cache delta, if objects are large enough compared to delta size */
+	if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
+		trg_entry->delta_data = delta_buf;
+	else
+		free(delta_buf);
 	return 1;
 }
 
-- 
1.5.2.rc3.802.g4b4b7

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2007-05-22  9:25 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-20 21:11 [PATCH] git-pack-objects: cache small deltas between big objects Martin Koegler
2007-05-21  4:35 ` Dana How
2007-05-21 17:59   ` Martin Koegler
2007-05-22  7:01     ` Dana How
2007-05-22  8:04       ` Junio C Hamano
2007-05-22  9:25         ` Dana How
2007-05-21  4:54 ` Junio C Hamano
2007-05-21 17:00   ` Martin Koegler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).