From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH] rev-list: preallocate object hash table in --all --objects
Date: Fri, 29 Mar 2013 20:20:10 +0700 [thread overview]
Message-ID: <1364563210-28813-1-git-send-email-pclouds@gmail.com> (raw)
Every time the object hash table grows, all entries must be
rearranged. The few last times could be really expensive when the
table contains a lot of entries.
When we do "--all --objects" we know in advance we may need to hash
all objects. Just prepare the hash table big enough, so there won't be
any resizing later on. The hash table is resized a couple times before
prehash_objects() is called. But that's ok because there aren't many
objects by that time (unless you have tons of refs, likely tags..)
On linux-2.6.git:
before after
real 0m34.402s 0m33.288s
user 0m34.111s 0m32.863s
sys 0m0.205s 0m0.352s
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
object.c | 21 +++++++++++++++++++--
object.h | 2 ++
revision.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
3 files changed, 76 insertions(+), 6 deletions(-)
diff --git a/object.c b/object.c
index 20703f5..bcfd2c6 100644
--- a/object.c
+++ b/object.c
@@ -88,10 +88,10 @@ struct object *lookup_object(const unsigned char *sha1)
return obj;
}
-static void grow_object_hash(void)
+void grow_object_hash_to(unsigned long nr)
{
int i;
- int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
+ int new_hash_size = nr < 32 ? 32 : nr * 2;
struct object **new_hash;
new_hash = xcalloc(new_hash_size, sizeof(struct object *));
@@ -106,6 +106,11 @@ static void grow_object_hash(void)
obj_hash_size = new_hash_size;
}
+static void grow_object_hash(void)
+{
+ grow_object_hash_to(obj_hash_size);
+}
+
void *create_object(const unsigned char *sha1, int type, void *o)
{
struct object *obj = o;
@@ -307,3 +312,15 @@ void clear_object_flags(unsigned flags)
obj->flags &= ~flags;
}
}
+
+int has_object_flags(unsigned flags)
+{
+ int i;
+
+ for (i = 0; i < obj_hash_size; i++) {
+ struct object *obj = obj_hash[i];
+ if (obj && (obj->flags & flags))
+ return 1;
+ }
+ return 0;
+}
diff --git a/object.h b/object.h
index 97d384b..1e8fee8 100644
--- a/object.h
+++ b/object.h
@@ -52,6 +52,7 @@ extern struct object *get_indexed_object(unsigned int);
*/
struct object *lookup_object(const unsigned char *sha1);
+extern void grow_object_hash_to(unsigned long nr);
extern void *create_object(const unsigned char *sha1, int type, void *obj);
/*
@@ -87,6 +88,7 @@ void add_object_array(struct object *obj, const char *name, struct object_array
void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode);
void object_array_remove_duplicates(struct object_array *);
+int has_object_flags(unsigned flags);
void clear_object_flags(unsigned flags);
#endif /* OBJECT_H */
diff --git a/revision.c b/revision.c
index 71e62d8..f9ea2d1 100644
--- a/revision.c
+++ b/revision.c
@@ -1665,8 +1665,9 @@ static int for_each_good_bisect_ref(const char *submodule, each_ref_fn fn, void
}
static int handle_revision_pseudo_opt(const char *submodule,
- struct rev_info *revs,
- int argc, const char **argv, int *flags)
+ struct rev_info *revs,
+ int argc, const char **argv,
+ int *flags, int *all)
{
const char *arg = argv[0];
const char *optarg;
@@ -1685,6 +1686,7 @@ static int handle_revision_pseudo_opt(const char *submodule,
if (!strcmp(arg, "--all")) {
handle_refs(submodule, revs, *flags, for_each_ref_submodule);
handle_refs(submodule, revs, *flags, head_ref_submodule);
+ *all = 1;
} else if (!strcmp(arg, "--branches")) {
handle_refs(submodule, revs, *flags, for_each_branch_ref_submodule);
} else if (!strcmp(arg, "--bisect")) {
@@ -1738,6 +1740,49 @@ static int handle_revision_pseudo_opt(const char *submodule,
return 1;
}
+static void preallocate_hash_table(void)
+{
+ unsigned long cnt = 0;
+ struct packed_git *p;
+ int i;
+
+ if (has_object_flags(UNINTERESTING))
+ /*
+ * nope this is not simply "--all --objects"
+ * not worth preallocation.
+ */
+ return;
+
+ for (i = 0; i < 256; i++) {
+ struct dirent *ent;
+ DIR *d = opendir(git_path("objects/%02x", i));
+ if (!d)
+ continue;
+ while ((ent = readdir(d)) != NULL)
+ /*
+ * We only worry about insufficient size which
+ * leads to expensive growths later on. A few
+ * extra slots in the hash table would not hurt.
+ */
+ cnt++;
+ closedir(d);
+ }
+
+ if (!packed_git)
+ prepare_packed_git();
+
+ for (p = packed_git; p; p = p->next) {
+ if (!p->pack_local)
+ /* this may lead to extra growths later */
+ continue;
+ if (open_pack_index(p))
+ continue;
+ cnt += p->num_objects;
+ }
+
+ grow_object_hash_to(cnt);
+}
+
/*
* Parse revision information, filling in the "rev_info" structure,
* and removing the used arguments from the argument list.
@@ -1750,6 +1795,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
int i, flags, left, seen_dashdash, read_from_stdin, got_rev_arg = 0, revarg_opt;
struct cmdline_pathspec prune_data;
const char *submodule = NULL;
+ int all = 0;
memset(&prune_data, 0, sizeof(prune_data));
if (opt)
@@ -1785,8 +1831,9 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
int opts;
opts = handle_revision_pseudo_opt(submodule,
- revs, argc - i, argv + i,
- &flags);
+ revs, argc - i,
+ argv + i,
+ &flags, &all);
if (opts > 0) {
i += opts - 1;
continue;
@@ -1856,6 +1903,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
get_pathspec(revs->prefix, prune_data.path));
}
+ if (all && revs->tag_objects &&
+ revs->tree_objects && revs->blob_objects)
+ preallocate_hash_table();
+
if (revs->def == NULL)
revs->def = opt ? opt->def : NULL;
if (opt && opt->tweak)
--
1.8.2.83.gc99314b
next reply other threads:[~2013-03-29 13:20 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-29 13:20 Nguyễn Thái Ngọc Duy [this message]
2013-03-29 15:12 ` [PATCH] rev-list: preallocate object hash table in --all --objects Jeff King
2013-03-29 15:29 ` Duy Nguyen
2013-03-29 20:32 ` Jeff King
2013-04-01 18:33 ` Jeff King
2013-03-29 16:04 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1364563210-28813-1-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).