All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] pack-check: Sort entries by pack offset before unpacking them.
@ 2007-06-03 18:21 Alexandre Julliard
  0 siblings, 0 replies; only message in thread
From: Alexandre Julliard @ 2007-06-03 18:21 UTC (permalink / raw)
  To: git

Because of the way objects are sorted in a pack, unpacking them in
disk order is much more efficient than random access. Tests on the
Wine repository show a gain in pack validation time of about 35%.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
---
 pack-check.c |   47 +++++++++++++++++++++++++++++++++++------------
 1 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/pack-check.c b/pack-check.c
index 3623c71..d7dd62b 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -1,6 +1,23 @@
 #include "cache.h"
 #include "pack.h"
 
+struct idx_entry
+{
+	const unsigned char *sha1;
+	off_t                offset;
+};
+
+static int compare_entries(const void *e1, const void *e2)
+{
+	const struct idx_entry *entry1 = e1;
+	const struct idx_entry *entry2 = e2;
+	if (entry1->offset < entry2->offset)
+		return -1;
+	if (entry1->offset > entry2->offset)
+		return 1;
+	return 0;
+}
+
 static int verify_packfile(struct packed_git *p,
 		struct pack_window **w_curs)
 {
@@ -11,6 +28,7 @@ static int verify_packfile(struct packed_git *p,
 	off_t offset = 0, pack_sig = p->pack_size - 20;
 	uint32_t nr_objects, i;
 	int err;
+	struct idx_entry *entries;
 
 	/* Note that the pack header checks are actually performed by
 	 * use_pack when it first opens the pack file.  If anything
@@ -41,33 +59,38 @@ static int verify_packfile(struct packed_git *p,
 	 * we do not do scan-streaming check on the pack file.
 	 */
 	nr_objects = p->num_objects;
+	entries = xmalloc(nr_objects * sizeof(*entries));
+	/* first sort entries by pack offset, since unpacking them is more efficient that way */
+	for (i = 0; i < nr_objects; i++) {
+		entries[i].sha1 = nth_packed_object_sha1(p, i);
+		if (!entries[i].sha1)
+			die("internal error pack-check nth-packed-object");
+		entries[i].offset = find_pack_entry_one(entries[i].sha1, p);
+		if (!entries[i].offset)
+			die("internal error pack-check find-pack-entry-one");
+	}
+	qsort(entries, nr_objects, sizeof(*entries), compare_entries);
+
 	for (i = 0, err = 0; i < nr_objects; i++) {
-		const unsigned char *sha1;
 		void *data;
 		enum object_type type;
 		unsigned long size;
-		off_t offset;
 
-		sha1 = nth_packed_object_sha1(p, i);
-		if (!sha1)
-			die("internal error pack-check nth-packed-object");
-		offset = find_pack_entry_one(sha1, p);
-		if (!offset)
-			die("internal error pack-check find-pack-entry-one");
-		data = unpack_entry(p, offset, &type, &size);
+		data = unpack_entry(p, entries[i].offset, &type, &size);
 		if (!data) {
 			err = error("cannot unpack %s from %s",
-				    sha1_to_hex(sha1), p->pack_name);
+				    sha1_to_hex(entries[i].sha1), p->pack_name);
 			continue;
 		}
-		if (check_sha1_signature(sha1, data, size, typename(type))) {
+		if (check_sha1_signature(entries[i].sha1, data, size, typename(type))) {
 			err = error("packed %s from %s is corrupt",
-				    sha1_to_hex(sha1), p->pack_name);
+				    sha1_to_hex(entries[i].sha1), p->pack_name);
 			free(data);
 			continue;
 		}
 		free(data);
 	}
+	free(entries);
 
 	return err;
 }
-- 
1.5.2.156.gb09c8

-- 
Alexandre Julliard
julliard@winehq.org

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2007-06-03 18:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-06-03 18:21 [PATCH] pack-check: Sort entries by pack offset before unpacking them Alexandre Julliard

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.