git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <junkio@cox.net>
To: Linus Torvalds <torvalds@osdl.org>
Cc: git@vger.kernel.org
Subject: [PATCH] binary-tree-based objects.
Date: Sat, 11 Feb 2006 20:11:49 -0800	[thread overview]
Message-ID: <7vhd75fc6y.fsf_-_@assigned-by-dhcp.cox.net> (raw)
In-Reply-To: <7vslqpi9mg.fsf@assigned-by-dhcp.cox.net> (Junio C. Hamano's message of "Sat, 11 Feb 2006 18:39:03 -0800")

This implements Linus' idea to keep objects in a binary tree,
instead of using the linear array as we currently do.

Signed-off-by: Junio C Hamano <junkio@cox.net>

---

 * I haven't benched this seriously yet.  One datapoint:

	time git-rev-list --objects v2.6.15..linus | wc -l

   are 53sec vs 22sec improvement with the same output.

 fsck-objects.c |   36 +++++++++++++++++----------------
 name-rev.c     |   17 +++++++++++-----
 object.c       |   61 +++++++++++++++++++++-----------------------------------
 object.h       |    3 ++-
 4 files changed, 55 insertions(+), 62 deletions(-)

3c160f4d94cf16db5dc9c603e98ebacbe9ac4ca7
diff --git a/fsck-objects.c b/fsck-objects.c
index 9950be2..28a7c1b 100644
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -56,23 +56,21 @@ static int objwarning(struct object *obj
 }
 
 
-static void check_connectivity(void)
+static void check_connectivity(struct object *obj)
 {
-	int i;
-
 	/* Look up all the requirements, warn about missing objects.. */
-	for (i = 0; i < nr_objs; i++) {
-		struct object *obj = objs[i];
-
-		if (!obj->parsed) {
-			if (!standalone && has_sha1_file(obj->sha1))
-				; /* it is in pack */
-			else
-				printf("missing %s %s\n",
-				       obj->type, sha1_to_hex(obj->sha1));
-			continue;
-		}
+ again:
+	if (!obj)
+		return;
 
+	if (!obj->parsed) {
+		if (!standalone && has_sha1_file(obj->sha1))
+			; /* it is in pack */
+		else
+			printf("missing %s %s\n",
+			       obj->type, sha1_to_hex(obj->sha1));
+	}
+	else {
 		if (obj->refs) {
 			const struct object_refs *refs = obj->refs;
 			unsigned j;
@@ -91,14 +89,16 @@ static void check_connectivity(void)
 		if (show_unreachable && !(obj->flags & REACHABLE)) {
 			printf("unreachable %s %s\n",
 			       obj->type, sha1_to_hex(obj->sha1));
-			continue;
 		}
-
-		if (!obj->used) {
+		else if (!obj->used) {
 			printf("dangling %s %s\n", obj->type, 
 			       sha1_to_hex(obj->sha1));
 		}
 	}
+	if (obj->left && obj->right)
+		check_connectivity(obj->left);
+	obj = obj->right ? obj->right : obj->left;
+	goto again;
 }
 
 /*
@@ -556,6 +556,6 @@ int main(int argc, char **argv)
 		}
 	}
 
-	check_connectivity();
+	check_connectivity(objs_root);
 	return 0;
 }
diff --git a/name-rev.c b/name-rev.c
index bbadb91..a4fecfb 100644
--- a/name-rev.c
+++ b/name-rev.c
@@ -120,6 +120,17 @@ static const char* get_rev_name(struct o
 	return buffer;
 }
 	
+void show_all_names(struct object *obj)
+{
+	while (obj) {
+		printf("%s %s\n", sha1_to_hex(obj->sha1), get_rev_name(obj));
+		if (obj->left && obj->right)
+			show_all_names(obj->left);
+		obj = obj->right ? obj->right : obj->left;
+	}
+}
+
+
 int main(int argc, char **argv)
 {
 	struct object_list *revs = NULL;
@@ -230,11 +241,7 @@ int main(int argc, char **argv)
 				fwrite(p_start, p - p_start, 1, stdout);
 		}
 	} else if (all) {
-		int i;
-
-		for (i = 0; i < nr_objs; i++)
-			printf("%s %s\n", sha1_to_hex(objs[i]->sha1),
-					get_rev_name(objs[i]));
+		show_all_names(objs_root);
 	} else
 		for ( ; revs; revs = revs->next)
 			printf("%s %s\n", revs->name, get_rev_name(revs->item));
diff --git a/object.c b/object.c
index 1577f74..a1b0729 100644
--- a/object.c
+++ b/object.c
@@ -5,65 +5,50 @@
 #include "commit.h"
 #include "tag.h"
 
-struct object **objs;
+struct object *objs_root;
 int nr_objs;
-static int obj_allocs;
 
 int track_object_refs = 1;
 
-static int find_object(const unsigned char *sha1)
+static struct object **lookup_object_position(const unsigned char *sha1)
 {
-	int first = 0, last = nr_objs;
+	struct object **p = &objs_root;
 
-        while (first < last) {
-                int next = (first + last) / 2;
-                struct object *obj = objs[next];
-                int cmp;
-
-                cmp = memcmp(sha1, obj->sha1, 20);
-                if (!cmp)
-                        return next;
-                if (cmp < 0) {
-                        last = next;
-                        continue;
-                }
-                first = next+1;
-        }
-        return -first-1;
+	for (;;) {
+		struct object *object = *p;
+		int sign;
+
+		if (!object)
+			break;
+		sign = memcmp(sha1, object->sha1, 20);
+		if (!sign)
+			break;
+		p = &object->left;
+		if (sign < 0)
+			continue;
+		p = &object->right;
+	}
+	return p;
 }
 
 struct object *lookup_object(const unsigned char *sha1)
 {
-	int pos = find_object(sha1);
-	if (pos >= 0)
-		return objs[pos];
-	return NULL;
+	return *lookup_object_position(sha1);
 }
 
 void created_object(const unsigned char *sha1, struct object *obj)
 {
-	int pos = find_object(sha1);
+	struct object **op = lookup_object_position(sha1);
 
 	obj->parsed = 0;
 	memcpy(obj->sha1, sha1, 20);
 	obj->type = NULL;
 	obj->refs = NULL;
 	obj->used = 0;
-
-	if (pos >= 0)
+	obj->left = obj->right = NULL;
+	if (*op)
 		die("Inserting %s twice\n", sha1_to_hex(sha1));
-	pos = -pos-1;
-
-	if (obj_allocs == nr_objs) {
-		obj_allocs = alloc_nr(obj_allocs);
-		objs = xrealloc(objs, obj_allocs * sizeof(struct object *));
-	}
-
-	/* Insert it into the right place */
-	memmove(objs + pos + 1, objs + pos, (nr_objs - pos) * 
-		sizeof(struct object *));
-
-	objs[pos] = obj;
+	*op = obj;
 	nr_objs++;
 }
 
diff --git a/object.h b/object.h
index 0e76182..32b276d 100644
--- a/object.h
+++ b/object.h
@@ -19,12 +19,13 @@ struct object {
 	unsigned char sha1[20];
 	const char *type;
 	struct object_refs *refs;
+	struct object *left, *right;
 	void *util;
 };
 
 extern int track_object_refs;
 extern int nr_objs;
-extern struct object **objs;
+extern struct object *objs_root;
 
 /** Internal only **/
 struct object *lookup_object(const unsigned char *sha1);
-- 
1.1.6.g69c5

  reply	other threads:[~2006-02-12  4:12 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-02-11 19:14 [PATCH] Optimization for git-rev-list --objects Alexandre Julliard
2006-02-12  1:57 ` [PATCH] Use a hashtable for objects instead of a sorted list Johannes Schindelin
2006-02-12  2:46   ` Junio C Hamano
2006-02-12  8:52     ` Alexandre Julliard
2006-02-12 12:11       ` Junio C Hamano
2006-02-12 14:31         ` Johannes Schindelin
2006-02-12 11:19     ` Florian Weimer
2006-02-12 12:08       ` ***DONTUSE*** " Junio C Hamano
2006-02-12 13:46         ` Junio C Hamano
2006-02-12 17:26           ` Johannes Schindelin
2006-02-12 18:34             ` Junio C Hamano
2006-02-12  2:19 ` [PATCH] Optimization for git-rev-list --objects Linus Torvalds
2006-02-12  2:39   ` Junio C Hamano
2006-02-12  4:11     ` Junio C Hamano [this message]
2006-02-12  5:06       ` [PATCH] binary-tree-based objects Linus Torvalds
2006-02-12  5:22         ` Linus Torvalds
2006-02-12  5:23           ` Linus Torvalds
2006-02-12  5:48             ` Junio C Hamano
2006-02-12  6:07               ` Junio C Hamano
2006-02-12  6:53                 ` Linus Torvalds
2006-02-12  7:05           ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vhd75fc6y.fsf_-_@assigned-by-dhcp.cox.net \
    --to=junkio@cox.net \
    --cc=git@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).