[PATCH 4/8] knfsd: repcache: split hash index

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 4/8] knfsd: repcache: split hash index
@ 2006-10-11 11:27 Greg Banks
  2006-10-16  2:00 ` Neil Brown
  0 siblings, 1 reply; 6+ messages in thread
From: Greg Banks @ 2006-10-11 11:27 UTC (permalink / raw)
  To: Neil Brown; +Cc: Linux NFS Mailing List

knfsd: Split the duplicate request cache hash into multiple buckets
on SMP platforms, to avoid lock contention and bouncing cachelines.

Each non-idempotent NFS call (such as WRITE calls) exercises the
repcache twice: a lookup before the call is dispatched and an insert
afterward.  On write-heavy workloads, the single repcache spinlock
is a contention point.

On a 4 cpu Altix A350, running at circa 80,000 nonidempotent
calls per second, this change reduced CPU usage in nfsd_cache_lookup()
from 27.1% to 12.8% (out of 400%).

The hash index itself is also split into multiple smaller allocations,
one per bucket.  This allows the total effective size of the index to
be larger than without running the risk of running into the kmalloc()
size limit (an earlier version of this patch set resized the hash
index and would run into that limit).

Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
---

 fs/nfsd/nfscache.c |  166 ++++++++++++++++++++++++++++++------------
 1 files changed, 120 insertions(+), 46 deletions(-)

Index: linux-git-20061009/fs/nfsd/nfscache.c
===================================================================
--- linux-git-20061009.orig/fs/nfsd/nfscache.c	2006-10-10 16:37:52.170497220 +1000
+++ linux-git-20061009/fs/nfsd/nfscache.c	2006-10-10 16:39:12.624126466 +1000
@@ -8,6 +8,9 @@
  * it does things a bit differently.
  *
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ *
+ * SMP lock splitting by Greg Banks <gnb@melbourne.sgi.com>
+ *     Copyright (c) 2005-2006 Silicon Graphics, Inc.
  */
 
 #include <linux/kernel.h>
@@ -28,10 +31,43 @@
  * DEC Unix:	512-4096
  */
 #define CACHESIZE		1024
-#define HASHSIZE		64
+/* number of buckets used to manage LRU lists and cache locks (power of 2) */
+#ifdef CONFIG_SMP
+#define CACHE_NUM_BUCKETS	64
+#else
+#define CACHE_NUM_BUCKETS	1
+#endif
+/* largest possible number of entries in all LRU lists (power of 2) */
+#define CACHE_MAX_SIZE		(16*1024*CACHE_NUM_BUCKETS)
+/* largest possible number of entries in LRU per bucket */
+#define CACHE_BUCKET_MAX_SIZE	(CACHE_MAX_SIZE/CACHE_NUM_BUCKETS)
+/* log2 of largest desired hash chain length */
+#define MAX_CHAIN_ORDER		2
+/* size of the per-bucket hash table */
+#define HASHSIZE		((CACHE_MAX_SIZE>>MAX_CHAIN_ORDER)/CACHE_NUM_BUCKETS)
+
+/*
+ * locking for the reply cache:
+ * A cache entry is "single use" if c_state == RC_INPROG
+ * Otherwise, it when accessing _prev or _next, the lock for the
+ * appropriate bucket must be held.
+ *
+ * On uniprocessor systems, we have a single global lock and LRU
+ * list.  However on multiprocessor systems, to reduce contention
+ * on that spinlock under heavy nonidempotent load (think chmod -R)
+ * we use multiple buckets.  The lower few bits of the hash index
+ * are used to index the buckets.
+ */
+struct svc_cache_bucket
+{
+	spinlock_t lock;
+	struct list_head lru;
+	unsigned int size;
+	struct hlist_head *hash;
+} ____cacheline_aligned_in_smp;
 
-static struct hlist_head *	hash_list;
-static struct list_head 	lru_head;
+static struct svc_cache_bucket	cache_buckets[CACHE_NUM_BUCKETS];
+#define bucket_for_hash(hash)	(&cache_buckets[(hash) & (CACHE_NUM_BUCKETS-1)])
 static int			cache_disabled = 1;
 
 /*
@@ -45,31 +81,30 @@ request_hash(u32 xid)
 	u32 h = xid;
 	h ^= (xid >> 24);
 	h ^= ((xid & 0xff0000) >> 8);
-	return h & (HASHSIZE-1);
+	return h;
 }
 
 static int	nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
 
 /*
- * locking for the reply cache:
- * A cache entry is "single use" if c_state == RC_INPROG
- * Otherwise, it when accessing _prev or _next, the lock must be held.
+ * Initialise the reply cache data structures.
+ * Called without cache_lock, uses it internally.  Returns
+ * 0 on success, an error otherwise.
  */
-static DEFINE_SPINLOCK(cache_lock);
-
-void
-nfsd_cache_init(void)
+static void
+nfsd_cache_bucket_init(struct svc_cache_bucket *b, unsigned int nrecs)
 {
 	struct svc_cacherep	*rp;
-	int			i;
+	unsigned int		i;
+	LIST_HEAD(lru);
 
-	INIT_LIST_HEAD(&lru_head);
-	i = CACHESIZE;
+	/* allocate new entries without the lock, keep them on their own list */
+	i = nrecs;
 	while (i) {
 		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
 		if (!rp)
 			break;
-		list_add(&rp->c_lru, &lru_head);
+		list_add(&rp->c_lru, &lru);
 		rp->c_state = RC_UNUSED;
 		rp->c_type = RC_NOCACHE;
 		INIT_HLIST_NODE(&rp->c_hash);
@@ -77,17 +112,41 @@ nfsd_cache_init(void)
 	}
 
 	if (i)
-		printk (KERN_ERR "nfsd: cannot allocate all %d cache entries, only got %d\n",
-			CACHESIZE, CACHESIZE-i);
+		printk (KERN_ERR "nfsd: cannot allocate all %u cache entries, only got %u\n",
+			nrecs, nrecs-i);
 
-	hash_list = kmalloc (HASHSIZE * sizeof(struct hlist_head), GFP_KERNEL);
-	if (!hash_list) {
-		nfsd_cache_shutdown();
-		printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash list\n",
-			HASHSIZE * sizeof(struct hlist_head));
-		return;
+
+	/* add the new entries */
+	spin_lock(&b->lock);
+
+	b->size += (nrecs-i);
+	list_splice(&lru, &b->lru);
+
+	spin_unlock(&b->lock);
+}
+
+void
+nfsd_cache_init(void)
+{
+	unsigned int bucket;
+
+	for (bucket = 0 ; bucket < CACHE_NUM_BUCKETS ; bucket++)
+	{
+		struct svc_cache_bucket *b = &cache_buckets[bucket];
+
+		INIT_LIST_HEAD(&b->lru);
+		spin_lock_init(&b->lock);
+		nfsd_cache_bucket_init(b, CACHESIZE/CACHE_NUM_BUCKETS);
+
+		b->hash = kmalloc (HASHSIZE * sizeof(struct hlist_head), GFP_KERNEL);
+		if (!b->hash) {
+			nfsd_cache_shutdown();
+			printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash index\n",
+				HASHSIZE * sizeof(struct hlist_head));
+			return;
+		}
+		memset(b->hash, 0, HASHSIZE * sizeof(struct hlist_head));
 	}
-	memset(hash_list, 0, HASHSIZE * sizeof(struct hlist_head));
 
 	cache_disabled = 0;
 }
@@ -96,28 +155,34 @@ void
 nfsd_cache_shutdown(void)
 {
 	struct svc_cacherep	*rp;
+	unsigned int bucket;
 
-	while (!list_empty(&lru_head)) {
-		rp = list_entry(lru_head.next, struct svc_cacherep, c_lru);
-		if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
-			kfree(rp->c_replvec.iov_base);
-		list_del(&rp->c_lru);
-		kfree(rp);
+	for (bucket = 0 ; bucket < CACHE_NUM_BUCKETS ; bucket++)
+	{
+		struct svc_cache_bucket *b = &cache_buckets[bucket];
+
+		while (!list_empty(&b->lru)) {
+			rp = list_entry(b->lru.next, struct svc_cacherep, c_lru);
+			if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
+				kfree(rp->c_replvec.iov_base);
+			list_del(&rp->c_lru);
+			kfree(rp);
+		}
+		kfree (b->hash);
+		b->hash = NULL;
 	}
 
 	cache_disabled = 1;
 
-	kfree (hash_list);
-	hash_list = NULL;
 }
 
 /*
  * Move cache entry to end of LRU list
  */
 static void
-lru_put_end(struct svc_cacherep *rp)
+lru_put_end(struct svc_cache_bucket *b, struct svc_cacherep *rp)
 {
-	list_move_tail(&rp->c_lru, &lru_head);
+	list_move_tail(&rp->c_lru, &b->lru);
 }
 
 /*
@@ -135,20 +200,26 @@ nfsd_cache_lookup(struct svc_rqst *rqstp
 				proto =  rqstp->rq_prot,
 				vers = rqstp->rq_vers,
 				proc = rqstp->rq_proc;
+	u32			h;
+	struct svc_cache_bucket *b;
 	unsigned long		age;
 	int			rtn;
 	int			safe = 0;
 
+	h = request_hash(xid);
+	b = bucket_for_hash(h);
+	h = (h / CACHE_NUM_BUCKETS) & (HASHSIZE-1);
+
 	rqstp->rq_cacherep = NULL;
 	if (cache_disabled || type == RC_NOCACHE) {
 		nfsdstats.rcnocache++;
 		return RC_DOIT;
 	}
 
-	spin_lock(&cache_lock);
+	spin_lock(&b->lock);
 	rtn = RC_DOIT;
 
-	rh = &hash_list[request_hash(xid)];
+	rh = &b->hash[h];
 	hlist_for_each_entry(rp, hn, rh, c_hash) {
 		if (rp->c_state != RC_UNUSED &&
 		    xid == rp->c_xid && proc == rp->c_proc &&
@@ -162,10 +233,10 @@ nfsd_cache_lookup(struct svc_rqst *rqstp
 	nfsdstats.rcmisses++;
 
 	/* This loop shouldn't take more than a few iterations normally */
-	list_for_each_entry(rp, &lru_head, c_lru) {
+	list_for_each_entry(rp, &b->lru, c_lru) {
 		if (rp->c_state != RC_INPROG)
 			break;
-		if (safe++ > CACHESIZE) {
+		if (safe++ > b->size) {
 			printk("nfsd: loop in repcache LRU list\n");
 			cache_disabled = 1;
 			goto out;
@@ -195,7 +266,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp
 
  	/* Move the cache entry from one hash list to another */
 	hlist_del_init(&rp->c_hash);
-	hlist_add_head(&rp->c_hash, hash_list + request_hash(rp->c_xid));
+	hlist_add_head(&rp->c_hash, b->hash + h);
 
 	/* release any buffer */
 	if (rp->c_type == RC_REPLBUFF) {
@@ -204,14 +275,14 @@ nfsd_cache_lookup(struct svc_rqst *rqstp
 	}
 	rp->c_type = RC_NOCACHE;
  out:
-	spin_unlock(&cache_lock);
+	spin_unlock(&b->lock);
 	return rtn;
 
 found_entry:
 	/* We found a matching entry which is either in progress or done. */
 	age = jiffies - rp->c_timestamp;
 	rp->c_timestamp = jiffies;
-	lru_put_end(rp);
+	lru_put_end(b, rp);
 
 	rtn = RC_DROPIT;
 	/* Request being processed or excessive rexmits */
@@ -267,10 +338,13 @@ nfsd_cache_update(struct svc_rqst *rqstp
 	struct svc_cacherep *rp;
 	struct kvec	*resv = &rqstp->rq_res.head[0], *cachv;
 	int		len;
+	struct svc_cache_bucket *b;
 
 	if (!(rp = rqstp->rq_cacherep) || cache_disabled)
 		return;
 
+	b = bucket_for_hash(request_hash(rp->c_xid));
+
 	len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
 	len >>= 2;
 
@@ -290,22 +364,22 @@ nfsd_cache_update(struct svc_rqst *rqstp
 		cachv = &rp->c_replvec;
 		cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
 		if (!cachv->iov_base) {
-			spin_lock(&cache_lock);
+			spin_lock(&b->lock);
 			rp->c_state = RC_UNUSED;
-			spin_unlock(&cache_lock);
+			spin_unlock(&b->lock);
 			return;
 		}
 		cachv->iov_len = len << 2;
 		memcpy(cachv->iov_base, statp, len << 2);
 		break;
 	}
-	spin_lock(&cache_lock);
-	lru_put_end(rp);
+	spin_lock(&b->lock);
+	lru_put_end(b, rp);
 	rp->c_secure = rqstp->rq_secure;
 	rp->c_type = cachetype;
 	rp->c_state = RC_DONE;
 	rp->c_timestamp = jiffies;
-	spin_unlock(&cache_lock);
+	spin_unlock(&b->lock);
 	return;
 }
 

Greg.
-- 
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
I don't speak for SGI.



-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 4/8] knfsd: repcache: split hash index
  2006-10-11 11:27 [PATCH 4/8] knfsd: repcache: split hash index Greg Banks
@ 2006-10-16  2:00 ` Neil Brown
  2006-10-16  6:38   ` David Chinner
  2006-10-16  9:51   ` Greg Banks
  0 siblings, 2 replies; 6+ messages in thread
From: Neil Brown @ 2006-10-16  2:00 UTC (permalink / raw)
  To: Greg Banks; +Cc: Linux NFS Mailing List

On Wednesday October 11, gnb@melbourne.sgi.com wrote:
>   */
>  #define CACHESIZE		1024
> -#define HASHSIZE		64
> +/* number of buckets used to manage LRU lists and cache locks (power of 2) */
> +#ifdef CONFIG_SMP
> +#define CACHE_NUM_BUCKETS	64
> +#else
> +#define CACHE_NUM_BUCKETS	1
> +#endif
> +/* largest possible number of entries in all LRU lists (power of 2) */
> +#define CACHE_MAX_SIZE		(16*1024*CACHE_NUM_BUCKETS)
> +/* largest possible number of entries in LRU per bucket */
> +#define CACHE_BUCKET_MAX_SIZE	(CACHE_MAX_SIZE/CACHE_NUM_BUCKETS)
> +/* log2 of largest desired hash chain length */
> +#define MAX_CHAIN_ORDER		2
> +/* size of the per-bucket hash table */
> +#define HASHSIZE		((CACHE_MAX_SIZE>>MAX_CHAIN_ORDER)/CACHE_NUM_BUCKETS)

If I've done my sums right (there is always room for doubt), then HASHSIZE == 4096.

> +
> +		b->hash = kmalloc (HASHSIZE * sizeof(struct hlist_head), GFP_KERNEL);

So this kmalloc asks for 16K or 32K depending on pointer size.  On
most machines that would be an order 2 or 3 allocation which is more
likely to fail that order 0.
I would really like to see HASHSIZE limited to PAGE_SIZE, and if
needed, push CACHE_NUM_BUCKETS up ... that might make the
'cache_buckets' array bigger than a page, but we don't kmalloc that so
it shouldn't be a problem.

Hmmm.. but if we wanted to scale the hash table size based on memory,
we would want to kmalloc cache_buckets which might limit it's size...
Maybe we could try allocating it big - which might work on big
machines, and scale back the size on kmalloc failure??  That would
probably work.

So for now I would like to see this limit HASHSIZE to
PAGE_SIZE/sizeof(void*), and possibly make CACHE_NUM_BUCKETS bigger in
some circumstances.  Allocating cache_buckets based on memory size can
come later if it is needed.

Sound fair?

NeilBrown

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 4/8] knfsd: repcache: split hash index
  2006-10-16  2:00 ` Neil Brown
@ 2006-10-16  6:38   ` David Chinner
  2006-10-16  9:51   ` Greg Banks
  1 sibling, 0 replies; 6+ messages in thread
From: David Chinner @ 2006-10-16  6:38 UTC (permalink / raw)
  To: Neil Brown; +Cc: Linux NFS Mailing List, Greg Banks

On Mon, Oct 16, 2006 at 12:00:08PM +1000, Neil Brown wrote:
> If I've done my sums right (there is always room for doubt), then HASHSIZE == 4096.
> 
> > +
> > +		b->hash = kmalloc (HASHSIZE * sizeof(struct hlist_head), GFP_KERNEL);
> 
> So this kmalloc asks for 16K or 32K depending on pointer size.  On
> most machines that would be an order 2 or 3 allocation which is more
> likely to fail that order 0.
>
> I would really like to see HASHSIZE limited to PAGE_SIZE, and if
> needed, push CACHE_NUM_BUCKETS up ... that might make the
> 'cache_buckets' array bigger than a page, but we don't kmalloc that so
> it shouldn't be a problem.
> 
> Hmmm.. but if we wanted to scale the hash table size based on memory,
> we would want to kmalloc cache_buckets which might limit it's size...
> Maybe we could try allocating it big - which might work on big
> machines, and scale back the size on kmalloc failure??  That would
> probably work.

Yes, that works.  e.g. XFS uses kmem_zalloc_greedy() allocating
large hashes at mount time....

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 4/8] knfsd: repcache: split hash index
  2006-10-16  2:00 ` Neil Brown
  2006-10-16  6:38   ` David Chinner
@ 2006-10-16  9:51   ` Greg Banks
  2006-10-16 10:23     ` Neil Brown
  1 sibling, 1 reply; 6+ messages in thread
From: Greg Banks @ 2006-10-16  9:51 UTC (permalink / raw)
  To: Neil Brown; +Cc: Linux NFS Mailing List

On Mon, Oct 16, 2006 at 12:00:08PM +1000, Neil Brown wrote:
> On Wednesday October 11, gnb@melbourne.sgi.com wrote:
> >   */
> >  #define CACHESIZE		1024
> > -#define HASHSIZE		64
> > +/* number of buckets used to manage LRU lists and cache locks (power of 2) */
> > +#ifdef CONFIG_SMP
> > +#define CACHE_NUM_BUCKETS	64
> > +#else
> > +#define CACHE_NUM_BUCKETS	1
> > +#endif
> > +/* largest possible number of entries in all LRU lists (power of 2) */
> > +#define CACHE_MAX_SIZE		(16*1024*CACHE_NUM_BUCKETS)
> > +/* largest possible number of entries in LRU per bucket */
> > +#define CACHE_BUCKET_MAX_SIZE	(CACHE_MAX_SIZE/CACHE_NUM_BUCKETS)
> > +/* log2 of largest desired hash chain length */
> > +#define MAX_CHAIN_ORDER		2
> > +/* size of the per-bucket hash table */
> > +#define HASHSIZE		((CACHE_MAX_SIZE>>MAX_CHAIN_ORDER)/CACHE_NUM_BUCKETS)
> 
> If I've done my sums right (there is always room for doubt), then HASHSIZE == 4096.

Correct.

> > +
> > +		b->hash = kmalloc (HASHSIZE * sizeof(struct hlist_head), GFP_KERNEL);
> 
> So this kmalloc asks for 16K or 32K depending on pointer size.  On
> most machines that would be an order 2 or 3 allocation which is more
> likely to fail that order 0.

This has run without allocation failures on 2 classes of machines:

* Altix (ia64), PAGE_SIZE=16K sizeof(void*)=8 => order=1
* Altix XE (x86_64), PAGE_SIZE=4K, sizeof(void*)=8 => order=3

Of course, under normal conditions these allocations happen when
the NFS server is started at boot and thus we have the best chance
of them succeeding.

But I take your point, the nfsd buffer saga teaches us that there's
value in strictly limiting the order of these allocations.


> I would really like to see HASHSIZE limited to PAGE_SIZE, and if
> needed, push CACHE_NUM_BUCKETS up ... that might make the
> 'cache_buckets' array bigger than a page, but we don't kmalloc that so
> it shouldn't be a problem.

Sounds reasonable.

> Hmmm.. but if we wanted to scale the hash table size based on memory,
> we would want to kmalloc cache_buckets which might limit it's size...

Let's look at the maths.  If we were to limit cache_buckets[] to a
single page, I calculate that would give us 186 entries on ia64, 46
on x86_64, and 68 on i386 (fewer if various spinlock-related config
options are enabled).  That's too low on x86_64 but fine on the other
platforms.  With a single order-1 allocation we could cover most bases.

Alternatively, we could allocate the buckets separately and make
cache_buckets[] an array of pointers to buckets.  Then we could do a
single (say) 128*sizeof(svc_cache_bucket*) allocation plus (say) 128 *
sizeof(svc_cache_bucket) allocations, all of which would be order 0.
Now we've effectively got a 3-level fat tree keyed on hash value.

The more I think about it, the more I like this idea.

> So for now I would like to see this limit HASHSIZE to
> PAGE_SIZE/sizeof(void*), and possibly make CACHE_NUM_BUCKETS bigger in
> some circumstances.  Allocating cache_buckets based on memory size can
> come later if it is needed.
> 
> Sound fair?

Yep.  I'll work up a new version of the patch with both the above ideas.

Greg.
-- 
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
I don't speak for SGI.

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 4/8] knfsd: repcache: split hash index
  2006-10-16  9:51   ` Greg Banks
@ 2006-10-16 10:23     ` Neil Brown
  2006-10-16 11:06       ` Greg Banks
  0 siblings, 1 reply; 6+ messages in thread
From: Neil Brown @ 2006-10-16 10:23 UTC (permalink / raw)
  To: Greg Banks; +Cc: Linux NFS Mailing List

On Monday October 16, gnb@sgi.com wrote:
> 
> Let's look at the maths.  If we were to limit cache_buckets[] to a
> single page, I calculate that would give us 186 entries on ia64, 46
> on x86_64, and 68 on i386 (fewer if various spinlock-related config
> options are enabled).  That's too low on x86_64 but fine on the other
> platforms.  With a single order-1 allocation we could cover most
> bases.

I had thought that ____cacheline_aligned_in_smp would have padded it
more making few entries per page, but that probably isn't important.

> 
> Alternatively, we could allocate the buckets separately and make
> cache_buckets[] an array of pointers to buckets.  Then we could do a
> single (say) 128*sizeof(svc_cache_bucket*) allocation plus (say) 128 *
> sizeof(svc_cache_bucket) allocations, all of which would be order 0.
> Now we've effectively got a 3-level fat tree keyed on hash value.
> 
> The more I think about it, the more I like this idea.
> 

Me too.

NeilBrown

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 4/8] knfsd: repcache: split hash index
  2006-10-16 10:23     ` Neil Brown
@ 2006-10-16 11:06       ` Greg Banks
  0 siblings, 0 replies; 6+ messages in thread
From: Greg Banks @ 2006-10-16 11:06 UTC (permalink / raw)
  To: Neil Brown; +Cc: Linux NFS Mailing List

On Mon, Oct 16, 2006 at 08:23:30PM +1000, Neil Brown wrote:
> On Monday October 16, gnb@sgi.com wrote:
> > 
> > Let's look at the maths.  If we were to limit cache_buckets[] to a
> > single page, I calculate that would give us 186 entries on ia64, 46
> > on x86_64, and 68 on i386 (fewer if various spinlock-related config
> > options are enabled).  That's too low on x86_64 but fine on the other
> > platforms.  With a single order-1 allocation we could cover most
> > bases.
> 
> I had thought that ____cacheline_aligned_in_smp would have padded it
> more making few entries per page, but that probably isn't important.

Ooops, you're right.  That makes the numbers 128 on ia64, 42 on
x86_64, and 64 on i386.

> > Alternatively, we could allocate the buckets separately and make
> > cache_buckets[] an array of pointers to buckets.  Then we could do a
> > single (say) 128*sizeof(svc_cache_bucket*) allocation plus (say) 128 *
> > sizeof(svc_cache_bucket) allocations, all of which would be order 0.
> > Now we've effectively got a 3-level fat tree keyed on hash value.
> > 
> > The more I think about it, the more I like this idea.
> > 
> 
> Me too.

Righto, new patches probably tomorrow.

Greg.
-- 
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
I don't speak for SGI.

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2006-10-16 11:06 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-11 11:27 [PATCH 4/8] knfsd: repcache: split hash index Greg Banks
2006-10-16  2:00 ` Neil Brown
2006-10-16  6:38   ` David Chinner
2006-10-16  9:51   ` Greg Banks
2006-10-16 10:23     ` Neil Brown
2006-10-16 11:06       ` Greg Banks

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.