From: Junio C Hamano <gitster@pobox.com>
To: git@vger.kernel.org
Subject: [PATCH 07/11] object: try naive cuckoo hashing
Date: Thu, 11 Aug 2011 10:53:12 -0700 [thread overview]
Message-ID: <1313085196-13249-8-git-send-email-gitster@pobox.com> (raw)
In-Reply-To: <1313085196-13249-1-git-send-email-gitster@pobox.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
../+v/65c71a61547aecd8a6830391fda31ed5d18e0529/git-pack-objects
Counting objects: 2139209, done.
32.04user 3.43system 0:35.58elapsed 99%CPU (0avgtext+0avgdata 8178672maxresident)k
0inputs+0outputs (0major+1206546minor)pagefaults 0swaps
Counting objects: 2139209, done.
33.41user 3.15system 0:36.67elapsed 99%CPU (0avgtext+0avgdata 8178688maxresident)k
0inputs+0outputs (0major+1206547minor)pagefaults 0swaps
Counting objects: 2139209, done.
32.24user 3.17system 0:35.51elapsed 99%CPU (0avgtext+0avgdata 8179968maxresident)k
0inputs+0outputs (0major+1206598minor)pagefaults 0swaps
---
object.c | 99 ++++++++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 67 insertions(+), 32 deletions(-)
diff --git a/object.c b/object.c
index c2c0a7d..7624c48 100644
--- a/object.c
+++ b/object.c
@@ -50,33 +50,52 @@ static unsigned int hash_val(const unsigned char *sha1)
return hash;
}
-static void insert_obj_hash(struct object *obj, struct object **hash, unsigned int size)
-{
- unsigned int j = hash_val(obj->sha1) & (size-1);
- while (hash[j]) {
- j++;
- if (j >= size)
- j = 0;
- }
- hash[j] = obj;
-}
+#define H1(sha1) (hash_val(sha1) % obj_hash_size)
+#define H2(sha1) (hash_val((sha1) + sizeof(unsigned int)) % obj_hash_size)
struct object *lookup_object(const unsigned char *sha1)
{
- unsigned int i;
struct object *obj;
if (!obj_hash)
return NULL;
- i = hash_val(sha1) & (obj_hash_size-1);
- while ((obj = obj_hash[i]) != NULL) {
- if (!hashcmp(sha1, obj->sha1))
- break;
- i++;
- if (i == obj_hash_size)
- i = 0;
+ if ((obj = obj_hash[H1(sha1)]) && !hashcmp(sha1, obj->sha1))
+ return obj;
+ if ((obj = obj_hash[H2(sha1)]) && !hashcmp(sha1, obj->sha1))
+ return obj;
+ return NULL;
+}
+
+static void grow_object_hash(void); /* forward */
+
+/*
+ * A naive single-table cuckoo hashing implementation.
+ * Return NULL when "obj" is successfully inserted. Otherwise
+ * return a pointer to the object to be inserted (which may
+ * be different from the original obj). The caller is expected
+ * to grow the hash table and re-insert the returned object.
+ */
+static struct object *insert_obj_hash(struct object *obj)
+{
+ int loop;
+
+ for (loop = obj_hash_size / 2; 0 <= loop; loop--) {
+ struct object *tmp_obj;
+ unsigned int ix;
+
+ ix = H1(obj->sha1);
+ if (!obj_hash[ix]) {
+ obj_hash[ix] = obj;
+ return NULL;
+ }
+ ix = H2(obj->sha1);
+ tmp_obj = obj_hash[ix];
+ obj_hash[ix] = obj;
+ if (!tmp_obj)
+ return NULL;
+ obj = tmp_obj;
}
return obj;
}
@@ -89,25 +108,36 @@ static int next_size(int sz)
static void grow_object_hash(void)
{
- int i;
- int new_hash_size = next_size(obj_hash_size);
- struct object **new_hash;
-
- new_hash = xcalloc(new_hash_size, sizeof(struct object *));
- for (i = 0; i < obj_hash_size; i++) {
- struct object *obj = obj_hash[i];
- if (!obj)
+ struct object **current_hash;
+ int current_size;
+
+ current_hash = obj_hash;
+ current_size = obj_hash_size;
+ while (1) {
+ int i;
+ obj_hash_size = next_size(obj_hash_size);
+ obj_hash = xcalloc(obj_hash_size, sizeof(struct object *));
+
+ for (i = 0; i < current_size; i++) {
+ if (!current_hash[i])
+ continue;
+ if (insert_obj_hash(current_hash[i]))
+ break;
+ }
+ if (i < current_size) {
+ /* too small - grow and retry */
+ free(obj_hash);
continue;
- insert_obj_hash(obj, new_hash, new_hash_size);
+ }
+ free(current_hash);
+ return;
}
- free(obj_hash);
- obj_hash = new_hash;
- obj_hash_size = new_hash_size;
}
void *create_object(const unsigned char *sha1, int type, void *o)
{
struct object *obj = o;
+ struct object *to_insert;
obj->parsed = 0;
obj->used = 0;
@@ -117,8 +147,13 @@ void *create_object(const unsigned char *sha1, int type, void *o)
if (obj_hash_size - 1 <= nr_objs * 2)
grow_object_hash();
-
- insert_obj_hash(obj, obj_hash, obj_hash_size);
+ to_insert = obj;
+ while (1) {
+ to_insert = insert_obj_hash(to_insert);
+ if (!to_insert)
+ break;
+ grow_object_hash();
+ }
nr_objs++;
return obj;
}
--
1.7.6.433.g1421f
next prev parent reply other threads:[~2011-08-11 17:54 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-08-11 17:53 [PATCH 00/11] Micro-optimizing lookup_object() Junio C Hamano
2011-08-11 17:53 ` [PATCH 01/11] object.c: code movement for readability Junio C Hamano
2011-08-11 17:53 ` [PATCH 02/11] object.c: remove duplicated code for object hashing Junio C Hamano
2011-08-11 17:53 ` [PATCH 03/11] pack-objects --count-only Junio C Hamano
2011-08-11 17:53 ` [PATCH 04/11] object: next_size() helper for readability Junio C Hamano
2011-08-11 17:53 ` [PATCH 05/11] object hash: we know the table size is a power of two Junio C Hamano
2011-08-11 17:53 ` [PATCH 06/11] object: growing the hash-table more aggressively does not help much Junio C Hamano
2011-08-11 17:53 ` Junio C Hamano [this message]
2011-08-11 17:53 ` [PATCH 08/11] object: try 5-way cuckoo -- use all 20-bytes of SHA-1 Junio C Hamano
2011-08-11 17:53 ` [PATCH 09/11] object: try 4-way cuckoo Junio C Hamano
2011-08-11 17:53 ` [PATCH 10/11] object: try 3-way cuckoo Junio C Hamano
2011-08-11 17:53 ` [PATCH 11/11] object: try 2-way cuckoo again Junio C Hamano
2011-08-11 23:33 ` [FFT/PATCH 12/11] object.c: make object hash implementation more opaque Junio C Hamano
2011-08-12 15:59 ` git_checkattr() is inefficient when repeated [Re: [PATCH 00/11] Micro-optimizing lookup_object()] Thomas Rast
2011-08-15 23:19 ` Junio C Hamano
-- strict thread matches above, loose matches on Subject: below --
2011-08-13 10:22 [PATCH 07/11] object: try naive cuckoo hashing George Spelvin
2011-08-13 19:12 ` George Spelvin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1313085196-13249-8-git-send-email-gitster@pobox.com \
--to=gitster@pobox.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).