From: Alexandre Julliard <julliard@winehq.org>
To: git@vger.kernel.org
Subject: [PATCH] Optimization for git-rev-list --objects
Date: Sat, 11 Feb 2006 20:14:57 +0100 [thread overview]
Message-ID: <87slqpg11q.fsf@wine.dyndns.org> (raw)
When building a large list of objects, most of the time is spent in
created_object() inserting the objects in the sorted list. This patch
splits the list in 256 sublists to reduce the impact of the O(n^2)
list insertion.
On the WineHQ repository (220,000 objects), the patch reduces the time
needed for a 'git-rev-list --objects HEAD' from 2 minutes to 20 seconds.
Signed-off-by: Alexandre Julliard <julliard@winehq.org>
---
fsck-objects.c | 66 +++++++++++++++++++++++++++++---------------------------
name-rev.c | 9 ++++----
object.c | 24 ++++++++++----------
object.h | 4 ++-
4 files changed, 53 insertions(+), 50 deletions(-)
68f8fcb7f5883ecc44a80e822e0b78ee8efd9eb9
diff --git a/fsck-objects.c b/fsck-objects.c
index 9950be2..5bdb21e 100644
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -58,45 +58,47 @@ static int objwarning(struct object *obj
static void check_connectivity(void)
{
- int i;
+ int i, j;
/* Look up all the requirements, warn about missing objects.. */
- for (i = 0; i < nr_objs; i++) {
- struct object *obj = objs[i];
+ for (i = 0; i < 256; i++) {
+ for (j = 0; j < nr_objs[i]; j++) {
+ struct object *obj = objs[i][j];
- if (!obj->parsed) {
- if (!standalone && has_sha1_file(obj->sha1))
- ; /* it is in pack */
- else
- printf("missing %s %s\n",
- obj->type, sha1_to_hex(obj->sha1));
- continue;
- }
+ if (!obj->parsed) {
+ if (!standalone && has_sha1_file(obj->sha1))
+ ; /* it is in pack */
+ else
+ printf("missing %s %s\n",
+ obj->type, sha1_to_hex(obj->sha1));
+ continue;
+ }
- if (obj->refs) {
- const struct object_refs *refs = obj->refs;
- unsigned j;
- for (j = 0; j < refs->count; j++) {
- struct object *ref = refs->ref[j];
- if (ref->parsed ||
- (!standalone && has_sha1_file(ref->sha1)))
- continue;
- printf("broken link from %7s %s\n",
- obj->type, sha1_to_hex(obj->sha1));
- printf(" to %7s %s\n",
- ref->type, sha1_to_hex(ref->sha1));
+ if (obj->refs) {
+ const struct object_refs *refs = obj->refs;
+ unsigned k;
+ for (k = 0; k < refs->count; k++) {
+ struct object *ref = refs->ref[k];
+ if (ref->parsed ||
+ (!standalone && has_sha1_file(ref->sha1)))
+ continue;
+ printf("broken link from %7s %s\n",
+ obj->type, sha1_to_hex(obj->sha1));
+ printf(" to %7s %s\n",
+ ref->type, sha1_to_hex(ref->sha1));
+ }
}
- }
- if (show_unreachable && !(obj->flags & REACHABLE)) {
- printf("unreachable %s %s\n",
- obj->type, sha1_to_hex(obj->sha1));
- continue;
- }
+ if (show_unreachable && !(obj->flags & REACHABLE)) {
+ printf("unreachable %s %s\n",
+ obj->type, sha1_to_hex(obj->sha1));
+ continue;
+ }
- if (!obj->used) {
- printf("dangling %s %s\n", obj->type,
- sha1_to_hex(obj->sha1));
+ if (!obj->used) {
+ printf("dangling %s %s\n", obj->type,
+ sha1_to_hex(obj->sha1));
+ }
}
}
}
diff --git a/name-rev.c b/name-rev.c
index bbadb91..9a8d086 100644
--- a/name-rev.c
+++ b/name-rev.c
@@ -230,11 +230,12 @@ int main(int argc, char **argv)
fwrite(p_start, p - p_start, 1, stdout);
}
} else if (all) {
- int i;
+ int i, j;
- for (i = 0; i < nr_objs; i++)
- printf("%s %s\n", sha1_to_hex(objs[i]->sha1),
- get_rev_name(objs[i]));
+ for (i = 0; i < 256; i++)
+ for (j = 0; j < nr_objs[i]; j++)
+ printf("%s %s\n", sha1_to_hex(objs[i][j]->sha1),
+ get_rev_name(objs[i][j]));
} else
for ( ; revs; revs = revs->next)
printf("%s %s\n", revs->name, get_rev_name(revs->item));
diff --git a/object.c b/object.c
index 1577f74..fcd4d0c 100644
--- a/object.c
+++ b/object.c
@@ -5,19 +5,19 @@
#include "commit.h"
#include "tag.h"
-struct object **objs;
-int nr_objs;
-static int obj_allocs;
+struct object **objs[256];
+int nr_objs[256];
+static int obj_allocs[256];
int track_object_refs = 1;
static int find_object(const unsigned char *sha1)
{
- int first = 0, last = nr_objs;
+ int first = 0, last = nr_objs[*sha1];
while (first < last) {
int next = (first + last) / 2;
- struct object *obj = objs[next];
+ struct object *obj = objs[*sha1][next];
int cmp;
cmp = memcmp(sha1, obj->sha1, 20);
@@ -36,7 +36,7 @@ struct object *lookup_object(const unsig
{
int pos = find_object(sha1);
if (pos >= 0)
- return objs[pos];
+ return objs[*sha1][pos];
return NULL;
}
@@ -54,17 +54,17 @@ void created_object(const unsigned char
die("Inserting %s twice\n", sha1_to_hex(sha1));
pos = -pos-1;
- if (obj_allocs == nr_objs) {
- obj_allocs = alloc_nr(obj_allocs);
- objs = xrealloc(objs, obj_allocs * sizeof(struct object *));
+ if (obj_allocs[*sha1] == nr_objs[*sha1]) {
+ obj_allocs[*sha1] = alloc_nr(obj_allocs[*sha1]);
+ objs[*sha1] = xrealloc(objs[*sha1], obj_allocs[*sha1] * sizeof(struct object *));
}
/* Insert it into the right place */
- memmove(objs + pos + 1, objs + pos, (nr_objs - pos) *
+ memmove(objs[*sha1] + pos + 1, objs[*sha1] + pos, (nr_objs[*sha1] - pos) *
sizeof(struct object *));
- objs[pos] = obj;
- nr_objs++;
+ objs[*sha1][pos] = obj;
+ nr_objs[*sha1]++;
}
struct object_refs *alloc_object_refs(unsigned count)
diff --git a/object.h b/object.h
index 0e76182..dcd6ac4 100644
--- a/object.h
+++ b/object.h
@@ -23,8 +23,8 @@ struct object {
};
extern int track_object_refs;
-extern int nr_objs;
-extern struct object **objs;
+extern int nr_objs[256];
+extern struct object **objs[256];
/** Internal only **/
struct object *lookup_object(const unsigned char *sha1);
--
1.1.6.g68f8
--
Alexandre Julliard
julliard@winehq.org
next reply other threads:[~2006-02-11 19:15 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-02-11 19:14 Alexandre Julliard [this message]
2006-02-12 1:57 ` [PATCH] Use a hashtable for objects instead of a sorted list Johannes Schindelin
2006-02-12 2:46 ` Junio C Hamano
2006-02-12 8:52 ` Alexandre Julliard
2006-02-12 12:11 ` Junio C Hamano
2006-02-12 14:31 ` Johannes Schindelin
2006-02-12 11:19 ` Florian Weimer
2006-02-12 12:08 ` ***DONTUSE*** " Junio C Hamano
2006-02-12 13:46 ` Junio C Hamano
2006-02-12 17:26 ` Johannes Schindelin
2006-02-12 18:34 ` Junio C Hamano
2006-02-12 2:19 ` [PATCH] Optimization for git-rev-list --objects Linus Torvalds
2006-02-12 2:39 ` Junio C Hamano
2006-02-12 4:11 ` [PATCH] binary-tree-based objects Junio C Hamano
2006-02-12 5:06 ` Linus Torvalds
2006-02-12 5:22 ` Linus Torvalds
2006-02-12 5:23 ` Linus Torvalds
2006-02-12 5:48 ` Junio C Hamano
2006-02-12 6:07 ` Junio C Hamano
2006-02-12 6:53 ` Linus Torvalds
2006-02-12 7:05 ` Linus Torvalds
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87slqpg11q.fsf@wine.dyndns.org \
--to=julliard@winehq.org \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).