git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Optimization for git-rev-list --objects
@ 2006-02-11 19:14 Alexandre Julliard
  2006-02-12  1:57 ` [PATCH] Use a hashtable for objects instead of a sorted list Johannes Schindelin
  2006-02-12  2:19 ` [PATCH] Optimization for git-rev-list --objects Linus Torvalds
  0 siblings, 2 replies; 21+ messages in thread
From: Alexandre Julliard @ 2006-02-11 19:14 UTC (permalink / raw)
  To: git


When building a large list of objects, most of the time is spent in
created_object() inserting the objects in the sorted list. This patch
splits the list in 256 sublists to reduce the impact of the O(n^2)
list insertion.

On the WineHQ repository (220,000 objects), the patch reduces the time
needed for a 'git-rev-list --objects HEAD' from 2 minutes to 20 seconds.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>

---

 fsck-objects.c |   66 +++++++++++++++++++++++++++++---------------------------
 name-rev.c     |    9 ++++----
 object.c       |   24 ++++++++++----------
 object.h       |    4 ++-
 4 files changed, 53 insertions(+), 50 deletions(-)

68f8fcb7f5883ecc44a80e822e0b78ee8efd9eb9
diff --git a/fsck-objects.c b/fsck-objects.c
index 9950be2..5bdb21e 100644
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -58,45 +58,47 @@ static int objwarning(struct object *obj
 
 static void check_connectivity(void)
 {
-	int i;
+	int i, j;
 
 	/* Look up all the requirements, warn about missing objects.. */
-	for (i = 0; i < nr_objs; i++) {
-		struct object *obj = objs[i];
+	for (i = 0; i < 256; i++) {
+		for (j = 0; j < nr_objs[i]; j++) {
+			struct object *obj = objs[i][j];
 
-		if (!obj->parsed) {
-			if (!standalone && has_sha1_file(obj->sha1))
-				; /* it is in pack */
-			else
-				printf("missing %s %s\n",
-				       obj->type, sha1_to_hex(obj->sha1));
-			continue;
-		}
+			if (!obj->parsed) {
+				if (!standalone && has_sha1_file(obj->sha1))
+					; /* it is in pack */
+				else
+					printf("missing %s %s\n",
+					       obj->type, sha1_to_hex(obj->sha1));
+				continue;
+			}
 
-		if (obj->refs) {
-			const struct object_refs *refs = obj->refs;
-			unsigned j;
-			for (j = 0; j < refs->count; j++) {
-				struct object *ref = refs->ref[j];
-				if (ref->parsed ||
-				    (!standalone && has_sha1_file(ref->sha1)))
-					continue;
-				printf("broken link from %7s %s\n",
-				       obj->type, sha1_to_hex(obj->sha1));
-				printf("              to %7s %s\n",
-				       ref->type, sha1_to_hex(ref->sha1));
+			if (obj->refs) {
+				const struct object_refs *refs = obj->refs;
+				unsigned k;
+				for (k = 0; k < refs->count; k++) {
+					struct object *ref = refs->ref[k];
+					if (ref->parsed ||
+					    (!standalone && has_sha1_file(ref->sha1)))
+						continue;
+					printf("broken link from %7s %s\n",
+					       obj->type, sha1_to_hex(obj->sha1));
+					printf("              to %7s %s\n",
+					       ref->type, sha1_to_hex(ref->sha1));
+				}
 			}
-		}
 
-		if (show_unreachable && !(obj->flags & REACHABLE)) {
-			printf("unreachable %s %s\n",
-			       obj->type, sha1_to_hex(obj->sha1));
-			continue;
-		}
+			if (show_unreachable && !(obj->flags & REACHABLE)) {
+				printf("unreachable %s %s\n",
+				       obj->type, sha1_to_hex(obj->sha1));
+				continue;
+			}
 
-		if (!obj->used) {
-			printf("dangling %s %s\n", obj->type, 
-			       sha1_to_hex(obj->sha1));
+			if (!obj->used) {
+				printf("dangling %s %s\n", obj->type, 
+				       sha1_to_hex(obj->sha1));
+			}
 		}
 	}
 }
diff --git a/name-rev.c b/name-rev.c
index bbadb91..9a8d086 100644
--- a/name-rev.c
+++ b/name-rev.c
@@ -230,11 +230,12 @@ int main(int argc, char **argv)
 				fwrite(p_start, p - p_start, 1, stdout);
 		}
 	} else if (all) {
-		int i;
+		int i, j;
 
-		for (i = 0; i < nr_objs; i++)
-			printf("%s %s\n", sha1_to_hex(objs[i]->sha1),
-					get_rev_name(objs[i]));
+		for (i = 0; i < 256; i++)
+			for (j = 0; j < nr_objs[i]; j++)
+				printf("%s %s\n", sha1_to_hex(objs[i][j]->sha1),
+						get_rev_name(objs[i][j]));
 	} else
 		for ( ; revs; revs = revs->next)
 			printf("%s %s\n", revs->name, get_rev_name(revs->item));
diff --git a/object.c b/object.c
index 1577f74..fcd4d0c 100644
--- a/object.c
+++ b/object.c
@@ -5,19 +5,19 @@
 #include "commit.h"
 #include "tag.h"
 
-struct object **objs;
-int nr_objs;
-static int obj_allocs;
+struct object **objs[256];
+int nr_objs[256];
+static int obj_allocs[256];
 
 int track_object_refs = 1;
 
 static int find_object(const unsigned char *sha1)
 {
-	int first = 0, last = nr_objs;
+	int first = 0, last = nr_objs[*sha1];
 
         while (first < last) {
                 int next = (first + last) / 2;
-                struct object *obj = objs[next];
+                struct object *obj = objs[*sha1][next];
                 int cmp;
 
                 cmp = memcmp(sha1, obj->sha1, 20);
@@ -36,7 +36,7 @@ struct object *lookup_object(const unsig
 {
 	int pos = find_object(sha1);
 	if (pos >= 0)
-		return objs[pos];
+		return objs[*sha1][pos];
 	return NULL;
 }
 
@@ -54,17 +54,17 @@ void created_object(const unsigned char 
 		die("Inserting %s twice\n", sha1_to_hex(sha1));
 	pos = -pos-1;
 
-	if (obj_allocs == nr_objs) {
-		obj_allocs = alloc_nr(obj_allocs);
-		objs = xrealloc(objs, obj_allocs * sizeof(struct object *));
+	if (obj_allocs[*sha1] == nr_objs[*sha1]) {
+		obj_allocs[*sha1] = alloc_nr(obj_allocs[*sha1]);
+		objs[*sha1] = xrealloc(objs[*sha1], obj_allocs[*sha1] * sizeof(struct object *));
 	}
 
 	/* Insert it into the right place */
-	memmove(objs + pos + 1, objs + pos, (nr_objs - pos) * 
+	memmove(objs[*sha1] + pos + 1, objs[*sha1] + pos, (nr_objs[*sha1] - pos) * 
 		sizeof(struct object *));
 
-	objs[pos] = obj;
-	nr_objs++;
+	objs[*sha1][pos] = obj;
+	nr_objs[*sha1]++;
 }
 
 struct object_refs *alloc_object_refs(unsigned count)
diff --git a/object.h b/object.h
index 0e76182..dcd6ac4 100644
--- a/object.h
+++ b/object.h
@@ -23,8 +23,8 @@ struct object {
 };
 
 extern int track_object_refs;
-extern int nr_objs;
-extern struct object **objs;
+extern int nr_objs[256];
+extern struct object **objs[256];
 
 /** Internal only **/
 struct object *lookup_object(const unsigned char *sha1);
-- 
1.1.6.g68f8


-- 
Alexandre Julliard
julliard@winehq.org

^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2006-02-12 18:34 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-11 19:14 [PATCH] Optimization for git-rev-list --objects Alexandre Julliard
2006-02-12  1:57 ` [PATCH] Use a hashtable for objects instead of a sorted list Johannes Schindelin
2006-02-12  2:46   ` Junio C Hamano
2006-02-12  8:52     ` Alexandre Julliard
2006-02-12 12:11       ` Junio C Hamano
2006-02-12 14:31         ` Johannes Schindelin
2006-02-12 11:19     ` Florian Weimer
2006-02-12 12:08       ` ***DONTUSE*** " Junio C Hamano
2006-02-12 13:46         ` Junio C Hamano
2006-02-12 17:26           ` Johannes Schindelin
2006-02-12 18:34             ` Junio C Hamano
2006-02-12  2:19 ` [PATCH] Optimization for git-rev-list --objects Linus Torvalds
2006-02-12  2:39   ` Junio C Hamano
2006-02-12  4:11     ` [PATCH] binary-tree-based objects Junio C Hamano
2006-02-12  5:06       ` Linus Torvalds
2006-02-12  5:22         ` Linus Torvalds
2006-02-12  5:23           ` Linus Torvalds
2006-02-12  5:48             ` Junio C Hamano
2006-02-12  6:07               ` Junio C Hamano
2006-02-12  6:53                 ` Linus Torvalds
2006-02-12  7:05           ` Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).