[patch 18/29] knfsd: dynamically expand the reply cache

linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Greg Banks <gnb@sgi.com>
To: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Linux NFS ML <linux-nfs@vger.kernel.org>
Subject: [patch 18/29] knfsd: dynamically expand the reply cache
Date: Wed, 01 Apr 2009 07:28:18 +1100	[thread overview]
Message-ID: <20090331202944.386752000@sgi.com> (raw)
In-Reply-To: 20090331202800.739621000@sgi.com

Allow the reply cache to expand under nonidempotent NFS call load.
The current fixed limit on reply cache entries is actually so small
as to make the reply cache utterly ineffectual (see the comment in
nfscache.c for details).

This is a simpler version of an older more complicated patch which
dynamically expanded the hash index using lazy rehashing.  Here we
allocate a hash index which is too large for the initial size of the
reply cache, and don't ever resize it.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 fs/nfsd/nfscache.c |   76 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 66 insertions(+), 10 deletions(-)

Index: bfields/fs/nfsd/nfscache.c
===================================================================
--- bfields.orig/fs/nfsd/nfscache.c
+++ bfields/fs/nfsd/nfscache.c
@@ -9,7 +9,7 @@
  *
  * Copyright (C) 1995, 1996 Olaf Kirch <okir-pn4DOG8n3UYbFoVRYvo4fw@public.gmane.org>
  *
- * SMP lock splitting by Greg Banks <gnb@sgi.com>
+ * Dynamic expansion and SMP lock splitting by Greg Banks <gnb@sgi.com>
  *     Copyright (c) 2005-2009 Silicon Graphics, Inc.
  */
 
@@ -24,11 +24,21 @@
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/cache.h>
 
-/* Size of reply cache. Common values are:
+/* Initial size of reply cache. Common values are:
  * 4.3BSD:	128
  * 4.4BSD:	256
  * Solaris2:	1024
  * DEC Unix:	512-4096
+ *
+ * All these values reflect network packet rates and NFS loads common
+ * somewhen around 1990, and are utterly inadequate for modern NFS
+ * servers.  To be at all effective the reply cache needs to hold all
+ * NFS calls seen by the server for at least a client RPC timeout period
+ * (typically 1.1 seconds), and to handle weird IP routing issues should
+ * really hold 120 seconds of traffic.  A modern NFS server can be
+ * fielding upwards of 10,000 calls per second, which means the default
+ * cache size of 1024 holds about 102 milliseconds'  traffic, i.e. the
+ * default size is three orders of magnitude too small.
  */
 /* number of buckets used to manage LRU lists and cache locks (power of 2) */
 #ifdef CONFIG_SMP
@@ -36,14 +46,22 @@
 #else
 #define CACHE_NUM_BUCKETS	1
 #endif
-/* number of entries in all LRU lists (power of 2) */
+/* initial number of entries in all LRU lists (power of 2) */
 #define CACHE_SIZE		(1024)
+/* largest possible number of entries in all LRU lists (power of 2) */
+#define CACHE_MAX_SIZE		(16*1024*CACHE_NUM_BUCKETS)
 /* largest possible number of entries in LRU per bucket */
-#define CACHE_BUCKET_MAX_SIZE	(CACHE_SIZE/CACHE_NUM_BUCKETS)
+#define CACHE_BUCKET_MAX_SIZE	(CACHE_MAX_SIZE/CACHE_NUM_BUCKETS)
+/* number of entries each bucket will expand by */
+#define CACHE_BUCKET_INCREMENT	(1024/CACHE_NUM_BUCKETS)
 /* log2 of largest desired hash chain length */
 #define MAX_CHAIN_ORDER		2
 /* initial and maximum size of the per-bucket hash table */
-#define HASHSIZE		((CACHE_SIZE>>MAX_CHAIN_ORDER)/CACHE_NUM_BUCKETS)
+#define HASHSIZE		((CACHE_MAX_SIZE>>MAX_CHAIN_ORDER)/CACHE_NUM_BUCKETS)
+/* the cache attempts to expand if an entry younger than this is evicted */
+#define CACHE_THRESH_AGE	(11 * HZ / 10)  /* in jiffies */
+/* parameters for rate limiting cache expansion */
+#define CACHE_RATE_JIFFIES	(HZ/2)
 
 /*
  * locking for the reply cache:
@@ -63,6 +81,9 @@ struct svc_cache_bucket
 	struct list_head lru;
 	unsigned int size;
 	struct hlist_head *hash;
+	/* parameters for expand rate limiting */
+	unsigned long last;
+	unsigned long nhits;
 } ____cacheline_aligned_in_smp;
 
 static struct svc_cache_bucket	cache_buckets[CACHE_NUM_BUCKETS];
@@ -90,18 +111,18 @@ static inline u32 request_hash(u32 xid, 
 static int	nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
 
 /*
- * Initialise the reply cache data structures.
+ * Expand (or initialise) the reply cache data structures.
  * Called without cache_lock, uses it internally.  Returns
  * 0 on success, an error otherwise.
  */
-static int nfsd_cache_bucket_init(struct svc_cache_bucket *b, unsigned int num)
+static int nfsd_cache_bucket_expand(struct svc_cache_bucket *b, unsigned int increment)
 {
 	struct svc_cacherep *rp;
 	unsigned int i;
 	LIST_HEAD(lru);
 
 	/* allocate new entries without the lock, keep them on their own list */
-	i = num;
+	i = increment;
 	while (i) {
 		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
 		if (!rp)
@@ -116,7 +137,7 @@ static int nfsd_cache_bucket_init(struct
 	/* add the new entries */
 	spin_lock(&b->lock);
 
-	b->size = num;
+	b->size += increment;
 	list_splice(&lru, &b->lru);
 
 	spin_unlock(&b->lock);
@@ -142,7 +163,7 @@ int nfsd_reply_cache_init(void)
 
 		INIT_LIST_HEAD(&b->lru);
 		spin_lock_init(&b->lock);
-		if (nfsd_cache_bucket_init(b, CACHE_SIZE/CACHE_NUM_BUCKETS))
+		if (nfsd_cache_bucket_expand(b, CACHE_SIZE/CACHE_NUM_BUCKETS))
 			goto out_nomem;
 		b->hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
 		if (!b->hash)
@@ -189,6 +210,26 @@ static inline void lru_put_end(struct sv
 }
 
 /*
+ * Decide whether it is time to expand the cache.  Returns 1 iff
+ * the cache is to be expanded.  Called with bucket lock held.
+ */
+static int nfsd_cache_expand_ratelimit(struct svc_cache_bucket *b)
+{
+	unsigned long now = jiffies;
+
+	b->nhits++;
+	if (b->last == 0) {
+		b->last = now;
+	} else if ((now - b->last) > CACHE_RATE_JIFFIES &&
+		   b->nhits > (b->size >> 4)) {
+		b->nhits = 0;
+		b->last = now;
+		return 1;
+	}
+	return 0;
+}
+
+/*
  * Try to find an entry matching the current call in the cache. When none
  * is found, we grab the oldest unlocked entry off the LRU list.
  * Note that no operation within the loop may sleep.
@@ -207,6 +248,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp
 	struct svc_cache_bucket *b;
 	unsigned long		age;
 	int			rtn;
+	int			expand = 0;
 
 	rqstp->rq_cacherep = NULL;
 	if (cache_disabled || type == RC_NOCACHE) {
@@ -259,6 +301,18 @@ nfsd_cache_lookup(struct svc_rqst *rqstp
 		goto out;
 	}
 
+	if (rp->c_state != RC_UNUSED) {
+		/* reusing an existing cache entry */
+		age = jiffies - rp->c_timestamp;
+		if (age < CACHE_THRESH_AGE &&
+		    b->size < CACHE_BUCKET_MAX_SIZE &&
+		    nfsd_cache_expand_ratelimit(b)) {
+			expand = CACHE_BUCKET_INCREMENT;
+			if (b->size + expand > CACHE_BUCKET_MAX_SIZE)
+				expand = CACHE_BUCKET_MAX_SIZE - b->size;
+		}
+	}
+
 	rqstp->rq_cacherep = rp;
 	rp->c_state = RC_INPROG;
 	rp->c_xid = xid;
@@ -280,6 +334,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp
 	rp->c_type = RC_NOCACHE;
  out:
 	spin_unlock(&b->lock);
+	if (expand)
+		nfsd_cache_bucket_expand(b, expand);
 	return rtn;
 
 found_entry:

--
Greg

next prev parent reply	other threads:[~2009-03-31 21:02 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-31 20:28 [patch 00/29] SGI enhancedNFS patches Greg Banks
2009-03-31 20:28 ` [patch 01/29] knfsd: Add infrastructure for measuring RPC service times Greg Banks
2009-04-25  2:13   ` J. Bruce Fields
2009-04-25  2:14     ` J. Bruce Fields
2009-04-25  2:52     ` Greg Banks
2009-03-31 20:28 ` [patch 02/29] knfsd: Add stats table infrastructure Greg Banks
2009-04-25  3:56   ` J. Bruce Fields
2009-04-26  4:12     ` Greg Banks
2009-03-31 20:28 ` [patch 03/29] knfsd: add userspace controls for stats tables Greg Banks
2009-04-25 21:57   ` J. Bruce Fields
2009-04-25 22:03     ` J. Bruce Fields
2009-04-27 16:06       ` Chuck Lever
2009-04-27 23:22         ` J. Bruce Fields
2009-04-28 15:37           ` Chuck Lever
2009-04-28 15:57             ` J. Bruce Fields
2009-04-28 16:03               ` Chuck Lever
2009-04-28 16:26                 ` J. Bruce Fields
2009-04-29  1:45               ` Greg Banks
     [not found]         ` <ac442c870904271827w6041a67ew82fe36a843beeac3@mail.gmail.com>
     [not found]           ` <ac442c870904271827w6041a67ew82fe36a843beeac3-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-04-28  1:31             ` Greg Banks
2009-04-26  4:14     ` Greg Banks
2009-03-31 20:28 ` [patch 04/29] knfsd: Add stats updating API Greg Banks
2009-03-31 20:28 ` [patch 05/29] knfsd: Infrastructure for providing stats to userspace Greg Banks
2009-04-01  0:28   ` J. Bruce Fields
2009-04-01  3:43     ` Greg Banks
2009-03-31 20:28 ` [patch 06/29] knfsd: Gather per-export stats Greg Banks
2009-03-31 20:28 ` [patch 07/29] knfsd: Prefetch the per-export stats entry Greg Banks
2009-03-31 20:28 ` [patch 08/29] knfsd: Gather per-client stats Greg Banks
2009-03-31 20:28 ` [patch 09/29] knfsd: Cache per-client stats entry on TCP transports Greg Banks
2009-03-31 20:28 ` [patch 10/29] knfsd: Update per-client & per-export stats from NFSv3 Greg Banks
2009-03-31 20:28 ` [patch 11/29] knfsd: Update per-client & per-export stats from NFSv2 Greg Banks
2009-03-31 20:28 ` [patch 12/29] knfsd: Update per-client & per-export stats from NFSv4 Greg Banks
2009-03-31 20:28 ` [patch 13/29] knfsd: reply cache cleanups Greg Banks
2009-05-12 19:54   ` J. Bruce Fields
2009-03-31 20:28 ` [patch 14/29] knfsd: better hashing in the reply cache Greg Banks
2009-05-08 22:01   ` J. Bruce Fields
2009-03-31 20:28 ` [patch 15/29] knfsd: fix reply cache memory corruption Greg Banks
2009-05-12 19:55   ` J. Bruce Fields
2009-03-31 20:28 ` [patch 16/29] knfsd: use client IPv4 address in reply cache hash Greg Banks
2009-05-11 21:48   ` J. Bruce Fields
2009-03-31 20:28 ` [patch 17/29] knfsd: make the reply cache SMP-friendly Greg Banks
2009-03-31 20:28 ` Greg Banks [this message]
2009-05-26 18:57   ` [patch 18/29] knfsd: dynamically expand the reply cache J. Bruce Fields
2009-05-26 19:04     ` J. Bruce Fields
2009-05-26 21:24     ` Rob Gardner
2009-05-26 21:52       ` J. Bruce Fields
2009-05-27  0:28       ` Greg Banks
2009-03-31 20:28 ` [patch 19/29] knfsd: faster probing in " Greg Banks
2009-03-31 20:28 ` [patch 20/29] knfsd: add extended reply cache stats Greg Banks
2009-03-31 20:28 ` [patch 21/29] knfsd: remove unreported filehandle stats counters Greg Banks
2009-05-12 20:00   ` J. Bruce Fields
2009-03-31 20:28 ` [patch 22/29] knfsd: make svc_authenticate() scale Greg Banks
2009-05-12 21:24   ` J. Bruce Fields
2009-03-31 20:28 ` [patch 23/29] knfsd: introduce SVC_INC_STAT Greg Banks
2009-03-31 20:28 ` [patch 24/29] knfsd: remove the program field from struct svc_stat Greg Banks
2009-03-31 20:28 ` [patch 25/29] knfsd: allocate svc_serv.sv_stats dynamically Greg Banks
2009-03-31 20:28 ` [patch 26/29] knfsd: make svc_serv.sv_stats per-CPU Greg Banks
2009-03-31 20:28 ` [patch 27/29] knfsd: move hot procedure count field out of svc_procedure Greg Banks
2009-03-31 20:28 ` [patch 28/29] knfsd: introduce NFSD_INC_STAT() Greg Banks
2009-03-31 20:28 ` [patch 29/29] knfsd: make nfsdstats per-CPU Greg Banks
2009-04-01  0:23 ` [patch 00/29] SGI enhancedNFS patches J. Bruce Fields
2009-04-01  3:32   ` Greg Banks
     [not found]     ` <ac442c870903312032t34630c6dvdbb644cb510f8079-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-04-01  6:34       ` Jeff Garzik
2009-04-01  6:41         ` Greg Banks

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090331202944.386752000@sgi.com \
    --to=gnb@sgi.com \
    --cc=bfields@fieldses.org \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).