From: Junio C Hamano <gitster@pobox.com>
To: git@vger.kernel.org
Subject: [PATCH 07/11] object: try naive cuckoo hashing
Date: Thu, 11 Aug 2011 10:53:12 -0700 [thread overview]
Message-ID: <1313085196-13249-8-git-send-email-gitster@pobox.com> (raw)
In-Reply-To: <1313085196-13249-1-git-send-email-gitster@pobox.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
../+v/65c71a61547aecd8a6830391fda31ed5d18e0529/git-pack-objects
Counting objects: 2139209, done.
32.04user 3.43system 0:35.58elapsed 99%CPU (0avgtext+0avgdata 8178672maxresident)k
0inputs+0outputs (0major+1206546minor)pagefaults 0swaps
Counting objects: 2139209, done.
33.41user 3.15system 0:36.67elapsed 99%CPU (0avgtext+0avgdata 8178688maxresident)k
0inputs+0outputs (0major+1206547minor)pagefaults 0swaps
Counting objects: 2139209, done.
32.24user 3.17system 0:35.51elapsed 99%CPU (0avgtext+0avgdata 8179968maxresident)k
0inputs+0outputs (0major+1206598minor)pagefaults 0swaps
---
object.c | 99 ++++++++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 67 insertions(+), 32 deletions(-)
diff --git a/object.c b/object.c
index c2c0a7d..7624c48 100644
--- a/object.c
+++ b/object.c
@@ -50,33 +50,52 @@ static unsigned int hash_val(const unsigned char *sha1)
return hash;
}
-static void insert_obj_hash(struct object *obj, struct object **hash, unsigned int size)
-{
- unsigned int j = hash_val(obj->sha1) & (size-1);
- while (hash[j]) {
- j++;
- if (j >= size)
- j = 0;
- }
- hash[j] = obj;
-}
+#define H1(sha1) (hash_val(sha1) % obj_hash_size)
+#define H2(sha1) (hash_val((sha1) + sizeof(unsigned int)) % obj_hash_size)
struct object *lookup_object(const unsigned char *sha1)
{
- unsigned int i;
struct object *obj;
if (!obj_hash)
return NULL;
- i = hash_val(sha1) & (obj_hash_size-1);
- while ((obj = obj_hash[i]) != NULL) {
- if (!hashcmp(sha1, obj->sha1))
- break;
- i++;
- if (i == obj_hash_size)
- i = 0;
+ if ((obj = obj_hash[H1(sha1)]) && !hashcmp(sha1, obj->sha1))
+ return obj;
+ if ((obj = obj_hash[H2(sha1)]) && !hashcmp(sha1, obj->sha1))
+ return obj;
+ return NULL;
+}
+
+static void grow_object_hash(void); /* forward */
+
+/*
+ * A naive single-table cuckoo hashing implementation.
+ * Return NULL when "obj" is successfully inserted. Otherwise
+ * return a pointer to the object to be inserted (which may
+ * be different from the original obj). The caller is expected
+ * to grow the hash table and re-insert the returned object.
+ */
+static struct object *insert_obj_hash(struct object *obj)
+{
+ int loop;
+
+ for (loop = obj_hash_size / 2; 0 <= loop; loop--) {
+ struct object *tmp_obj;
+ unsigned int ix;
+
+ ix = H1(obj->sha1);
+ if (!obj_hash[ix]) {
+ obj_hash[ix] = obj;
+ return NULL;
+ }
+ ix = H2(obj->sha1);
+ tmp_obj = obj_hash[ix];
+ obj_hash[ix] = obj;
+ if (!tmp_obj)
+ return NULL;
+ obj = tmp_obj;
}
return obj;
}
@@ -89,25 +108,36 @@ static int next_size(int sz)
static void grow_object_hash(void)
{
- int i;
- int new_hash_size = next_size(obj_hash_size);
- struct object **new_hash;
-
- new_hash = xcalloc(new_hash_size, sizeof(struct object *));
- for (i = 0; i < obj_hash_size; i++) {
- struct object *obj = obj_hash[i];
- if (!obj)
+ struct object **current_hash;
+ int current_size;
+
+ current_hash = obj_hash;
+ current_size = obj_hash_size;
+ while (1) {
+ int i;
+ obj_hash_size = next_size(obj_hash_size);
+ obj_hash = xcalloc(obj_hash_size, sizeof(struct object *));
+
+ for (i = 0; i < current_size; i++) {
+ if (!current_hash[i])
+ continue;
+ if (insert_obj_hash(current_hash[i]))
+ break;
+ }
+ if (i < current_size) {
+ /* too small - grow and retry */
+ free(obj_hash);
continue;
- insert_obj_hash(obj, new_hash, new_hash_size);
+ }
+ free(current_hash);
+ return;
}
- free(obj_hash);
- obj_hash = new_hash;
- obj_hash_size = new_hash_size;
}
void *create_object(const unsigned char *sha1, int type, void *o)
{
struct object *obj = o;
+ struct object *to_insert;
obj->parsed = 0;
obj->used = 0;
@@ -117,8 +147,13 @@ void *create_object(const unsigned char *sha1, int type, void *o)
if (obj_hash_size - 1 <= nr_objs * 2)
grow_object_hash();
-
- insert_obj_hash(obj, obj_hash, obj_hash_size);
+ to_insert = obj;
+ while (1) {
+ to_insert = insert_obj_hash(to_insert);
+ if (!to_insert)
+ break;
+ grow_object_hash();
+ }
nr_objs++;
return obj;
}
--
1.7.6.433.g1421f
next prev parent reply other threads:[~2011-08-11 17:54 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-08-11 17:53 [PATCH 00/11] Micro-optimizing lookup_object() Junio C Hamano
2011-08-11 17:53 ` [PATCH 01/11] object.c: code movement for readability Junio C Hamano
2011-08-11 17:53 ` [PATCH 02/11] object.c: remove duplicated code for object hashing Junio C Hamano
2011-08-11 17:53 ` [PATCH 03/11] pack-objects --count-only Junio C Hamano
2011-08-11 17:53 ` [PATCH 04/11] object: next_size() helper for readability Junio C Hamano
2011-08-11 17:53 ` [PATCH 05/11] object hash: we know the table size is a power of two Junio C Hamano
2011-08-11 17:53 ` [PATCH 06/11] object: growing the hash-table more aggressively does not help much Junio C Hamano
2011-08-11 17:53 ` Junio C Hamano [this message]
2011-08-11 17:53 ` [PATCH 08/11] object: try 5-way cuckoo -- use all 20-bytes of SHA-1 Junio C Hamano
2011-08-11 17:53 ` [PATCH 09/11] object: try 4-way cuckoo Junio C Hamano
2011-08-11 17:53 ` [PATCH 10/11] object: try 3-way cuckoo Junio C Hamano
2011-08-11 17:53 ` [PATCH 11/11] object: try 2-way cuckoo again Junio C Hamano
2011-08-11 23:33 ` [FFT/PATCH 12/11] object.c: make object hash implementation more opaque Junio C Hamano
2011-08-12 15:59 ` git_checkattr() is inefficient when repeated [Re: [PATCH 00/11] Micro-optimizing lookup_object()] Thomas Rast
2011-08-15 23:19 ` Junio C Hamano
-- strict thread matches above, loose matches on Subject: below --
2011-08-13 10:22 [PATCH 07/11] object: try naive cuckoo hashing George Spelvin
2011-08-13 19:12 ` George Spelvin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1313085196-13249-8-git-send-email-gitster@pobox.com \
--to=gitster@pobox.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.