From: Pete Zaitcev <zaitcev@redhat.com>
To: Jeff Garzik <jeff@garzik.org>
Cc: Pete Zaitcev <zaitcev@redhat.com>,
Project Hail List <hail-devel@vger.kernel.org>
Subject: [Patch 5/4] chunkd: switch objcache to hash
Date: Wed, 6 Jan 2010 22:16:05 -0700 [thread overview]
Message-ID: <20100106221605.50b6a51c@redhat.com> (raw)
In-Reply-To: <20091227165948.16edaa07@redhat.com>
On second thought, let's switch objcache from list to hash. This should
help the lookup times if there's a large number of outstanding I/Os.
This also adds the objcache_count for self-testing purpoes.
And a few comments.
Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
---
include/objcache.h | 9 +++++---
server/objcache.c | 42 +++++++++++++++++++++++++----------------
test/objcache-unit.c | 8 ++++++-
3 files changed, 39 insertions(+), 20 deletions(-)
I think you mentioned that your scripts dealt with 5/4 patch numbering.
Just in case, setting references by replying to a previous patch mail.
commit 6238aa7321335b0a3aba9736ee1467777bdd6861
Author: Master <zaitcev@lembas.zaitcev.lan>
Date: Wed Jan 6 22:04:20 2010 -0700
Switch objcache from list to hash to make lookups faster.
diff --git a/include/objcache.h b/include/objcache.h
index fce1485..6e36e86 100644
--- a/include/objcache.h
+++ b/include/objcache.h
@@ -20,16 +20,14 @@
#define _CHUNKD_OBJCACHE_H_
#include <glib.h>
-#include <elist.h>
#include <stdbool.h>
struct objcache {
- struct list_head head;
GMutex *lock;
+ GHashTable *table;
};
struct objcache_entry {
- struct list_head link;
unsigned int hash;
unsigned int flags;
int ref;
@@ -60,6 +58,11 @@ extern bool objcache_test_dirty(struct objcache *cache,
extern void objcache_put(struct objcache *cache, struct objcache_entry *entry);
/*
+ * Count objects in the cache. Can be slow, and used only for debugging.
+ */
+extern int objcache_count(struct objcache *cache);
+
+/*
* Init a cache. Call once. May fail since it allocates a mutex.
*/
extern int objcache_init(struct objcache *cache);
diff --git a/server/objcache.c b/server/objcache.c
index 475ac23..e969f89 100644
--- a/server/objcache.c
+++ b/server/objcache.c
@@ -40,18 +40,6 @@ static unsigned int objcache_hash(const char *key, int klen)
return hash;
}
-static struct objcache_entry *objcache_lookup(struct objcache *cache,
- unsigned int hash)
-{
- struct objcache_entry *cep;
-
- list_for_each_entry(cep, &cache->head, link) {
- if (cep->hash == hash)
- return cep;
- }
- return NULL;
-}
-
static struct objcache_entry *objcache_insert(struct objcache *cache,
unsigned int hash)
{
@@ -63,10 +51,18 @@ static struct objcache_entry *objcache_insert(struct objcache *cache,
cep->hash = hash;
cep->flags = 0;
cep->ref = 1;
- list_add(&cep->link, &cache->head);
+ g_hash_table_insert(cache->table, &cep->hash, cep);
return cep;
}
+/*
+ * Observe the way we handle conflicts in the computed hash: we treat the
+ * keys with the same hash as same. It's acceptable in our application.
+ * At worst, an unrelated activity main in chunkd may spook self-check.
+ * This policy remains the same for list, tree, hash or any other implementing
+ * structure. If we use Glib's hash, it can have its own conflicts over
+ * a shared bucket indexed with our hash. We don't know anything about those.
+ */
struct objcache_entry *__objcache_get(struct objcache *cache,
const char *key, int klen,
unsigned int flag)
@@ -76,7 +72,7 @@ struct objcache_entry *__objcache_get(struct objcache *cache,
hash = objcache_hash(key, klen);
g_mutex_lock(cache->lock);
- cep = objcache_lookup(cache, hash);
+ cep = g_hash_table_lookup(cache->table, &hash);
if (cep) {
cep->ref++;
} else {
@@ -107,22 +103,36 @@ void objcache_put(struct objcache *cache, struct objcache_entry *cep)
}
--cep->ref;
if (!cep->ref) {
- list_del(&cep->link);
+ gboolean rcb;
+ rcb = g_hash_table_remove(cache->table, &cep->hash);
+ /*
+ * We are so super sure that this cannot happen that
+ * we use abort(), which is not welcome in daemons.
+ */
+ if (!rcb)
+ abort();
free(cep);
}
g_mutex_unlock(cache->lock);
}
+int objcache_count(struct objcache *cache)
+{
+ return g_hash_table_size(cache->table);
+}
+
int objcache_init(struct objcache *cache)
{
cache->lock = g_mutex_new();
if (!cache->lock)
return -1;
- INIT_LIST_HEAD(&cache->head);
+ /* We do not use g_str_hash becuse our keys may have nul bytes. */
+ cache->table = g_hash_table_new(g_int_hash, g_int_equal);
return 0;
}
void objcache_fini(struct objcache *cache)
{
g_mutex_free(cache->lock);
+ g_hash_table_destroy(cache->table);
}
diff --git a/test/objcache-unit.c b/test/objcache-unit.c
index f101445..874b57d 100644
--- a/test/objcache-unit.c
+++ b/test/objcache-unit.c
@@ -42,7 +42,10 @@ int main(int argc, char *argv[])
ep3 = objcache_get(&cache, k3, sizeof(k3));
OK(ep3 != NULL);
- OK(ep1->ref == 1); /* no collisions */
+ rc = objcache_count(&cache);
+ OK(rc == 3);
+
+ OK(ep1->ref == 1); /* no collisions, else improve hash */
objcache_put(&cache, ep1);
objcache_put(&cache, ep2);
@@ -53,6 +56,9 @@ int main(int argc, char *argv[])
OK(ep2->ref == 1); /* new */
objcache_put(&cache, ep2);
+ rc = objcache_count(&cache);
+ OK(rc == 0);
+
objcache_fini(&cache);
return 0;
}
prev parent reply other threads:[~2010-01-07 5:16 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-12-27 23:59 [Patch 3/4] chunkd: add objcache Pete Zaitcev
2010-01-07 5:16 ` Pete Zaitcev [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100106221605.50b6a51c@redhat.com \
--to=zaitcev@redhat.com \
--cc=hail-devel@vger.kernel.org \
--cc=jeff@garzik.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.