public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 0/3] nlm:Fix bug nlm cann't process retransmited request correctly
@ 2009-10-27  3:22 Mi Jinlong
  2009-10-27  3:27 ` [RFC][PATCH 1/3] sunrpc:add an universal DRC to sunrpc Mi Jinlong
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Mi Jinlong @ 2009-10-27  3:22 UTC (permalink / raw)
  To: Trond.Myklebust; +Cc: NFSv3 list, bfields, mingo

 I found a bug when tested NLM. The case likes followed:

           client                          server
             |                                |
      step1  |           open file            |
      open   |------------------------------->|
             |              ok                |
             |<-------------------------------|
             |                                | step2
             |                             -> | <-  service nfslock sto=
p
             |                                |
             | WL1: write lock request{0, 0}  |
      step3  |------------------------------->|
      fcntl  |                                |=20
             |                                |
             |    WL1_re: WL1 retransmit      |
             |------------------------------->|
             |                                |
             |       WL1.reply   ENOLCK       |
             |<-------------------------------|
             |                                |
             |                                | step4
             |                             -> | <- service nfslock star=
t
             |                                |
             | WL2: write lock request{0, 0}  |
      step5  |------------------------------->|
      fcntl  |                                |
             |        WL1_re.reply OK         |
             |<-------------------------------|
             |       WL2.reply EBLOCKD        |
             |<-------------------------------|


   client can not acquire for write lock any more after step4.

 Reason:
     Server reply ENOLCK for WL1 to client because nfslock service stop=
ed, but it can not
   distinguish  retransmited request with normal request, so it reply O=
K for WL1_re to
   client after nfslock service start. But fcntl client called will ret=
urn when it receive
   WL1.reply, and WL2 can not get a equal svid to WL1_re=EF=BC=8Cso ser=
ver will reply EBLOCKD for
   it.

 Suggestion:

   I suggest add an DRC for NLM.
   There are three patchs followed:

   1. add an universal DRC to sunrpc, it will be used by those protocol
      which be built on sunrpc
      [0001-Add-an-universal-DRC-to-sunrpc.patch]

   2. add DRC to NLM using sunrpc's universal DRC
      [0002-Add-DRC-to-NLM-using-sunrpc-s-DRC.patch]

   3. modify the nfsd's DRC to use sunrpc's universal DRC
      [0003-Modify-the-nfsd-s-DRC-to-use-sunrpc-s-universal-DRC.patch]

--

 arch/parisc/kernel/sys_parisc32.c |    2 +-
 arch/s390/kernel/compat_linux.c   |    2 +-
 arch/sparc/kernel/sys_sparc32.c   |    2 +-
 fs/lockd/svc.c                    |   66 ++++++++
 fs/lockd/svc4proc.c               |   52 +++---
 fs/nfsd/Makefile                  |    2 +-
 fs/nfsd/nfs2acl.c                 |    2 +-
 fs/nfsd/nfs3acl.c                 |    2 +-
 fs/nfsd/nfs3proc.c                |    2 +-
 fs/nfsd/nfs4proc.c                |    2 +-
 fs/nfsd/nfs4state.c               |    2 +-
 fs/nfsd/nfscache.c                |  330 -----------------------------=
--------
 fs/nfsd/nfsctl.c                  |   10 +-
 fs/nfsd/nfsproc.c                 |    2 +-
 fs/nfsd/nfssvc.c                  |   13 +-
 fs/nfsd/stats.c                   |    9 +-
 include/linux/lockd/lockd.h       |    6 +
 include/linux/nfsd/cache.h        |   62 -------
 include/linux/nfsd/stats.h        |    3 -
 include/linux/sunrpc/drc.h        |   97 +++++++++++
 net/sunrpc/Makefile               |    2 +-
 net/sunrpc/drc.c                  |  326 +++++++++++++++++++++++++++++=
+++++++
 22 files changed, 553 insertions(+), 443 deletions(-) =20

--=20
Regards
Mi Jinlong


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [RFC][PATCH 1/3] sunrpc:add an universal DRC to sunrpc
  2009-10-27  3:22 [RFC][PATCH 0/3] nlm:Fix bug nlm cann't process retransmited request correctly Mi Jinlong
@ 2009-10-27  3:27 ` Mi Jinlong
  2009-10-27  3:30 ` [RFC][PATCH 2/3] nlm:add DRC to NLM using sunrpc's universal DRC Mi Jinlong
  2009-10-27  3:31 ` [RFC][PATCH 3/3] nfsd:modify the nfsd's DRC to use " Mi Jinlong
  2 siblings, 0 replies; 4+ messages in thread
From: Mi Jinlong @ 2009-10-27  3:27 UTC (permalink / raw)
  To: Trond.Myklebust; +Cc: NFSv3 list, bfields, mingo

Add an universal DRC to sunrpc, it will be used by those protocols
which be built on sunrpc

Signed-off-by: Mi Jinlong <mijinlong@cn.fujitsu.com>
---
 include/linux/sunrpc/drc.h |   97 +++++++++++++
 net/sunrpc/Makefile        |    2 +-
 net/sunrpc/drc.c           |  326 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 424 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/sunrpc/drc.h
 create mode 100644 net/sunrpc/drc.c

diff --git a/include/linux/sunrpc/drc.h b/include/linux/sunrpc/drc.h
new file mode 100644
index 0000000..b581a4d
--- /dev/null
+++ b/include/linux/sunrpc/drc.h
@@ -0,0 +1,97 @@
+/*
+ * include/linux/sunrpc/drc.h
+ *
+ * Request reply cache. This was heavily inspired by the
+ * implementation in 4.3BSD/4.4BSD.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir-pn4DOG8n3UYbFoVRYvo4fw@public.gmane.org>
+ */
+
+#ifndef DRC_H
+#define DRC_H
+
+#include <linux/in.h>
+#include <linux/uio.h>
+#include <linux/spinlock.h>
+
+/*
+ * Representation of a reply cache entry.
+ */
+struct svc_cacherep {
+	struct hlist_node	c_hash;
+	struct list_head	c_lru;
+
+	unsigned char		c_state,	/* unused, inprog, done */
+				c_type,		/* status, buffer */
+				c_secure : 1;	/* req came from port < 1024 */
+	struct sockaddr_in	c_addr;
+	__be32			c_xid;
+	u32			c_prot;
+	u32			c_proc;
+	u32			c_vers;
+	unsigned long		c_timestamp;
+	union {
+		struct kvec	u_vec;
+		__be32		u_status;
+	}			c_u;
+};
+
+#define c_replvec		c_u.u_vec
+#define c_replstat		c_u.u_status
+
+struct drc_cache {
+	struct hlist_head 	*cache_hash;
+	struct list_head	lru_head;
+	int 			cache_disabled;
+
+	/* Record the cache */
+	unsigned int		rchits;		/* repcache hits */
+	unsigned int		rcmisses;	/* repcache misses */
+	unsigned int		rcnocache;	/* uncached reqs */
+
+	/*
+	 * locking for the reply cache:
+	 * A cache entry is "single use" if c_state == RC_INPROG
+	 * Otherwise, it when accessing _prev or _next, the lock must be held.
+	 */
+	spinlock_t		cache_lock;
+};
+
+/* cache entry states */
+enum {
+	RC_UNUSED,
+	RC_INPROG,
+	RC_DONE
+};
+
+/* return values */
+enum {
+	RC_DROPIT,
+	RC_REPLY,
+	RC_DOIT,
+	RC_INTR
+};
+
+/*
+ * Cache types.
+ * We may want to add more types one day, e.g. for diropres and
+ * attrstat replies. Using cache entries with fixed length instead
+ * of buffer pointers may be more efficient.
+ */
+enum {
+	RC_NOCACHE,
+	RC_REPLSTAT,
+	RC_REPLBUFF,
+};
+
+/*
+ * If requests are retransmitted within this interval, they're dropped.
+ */
+#define RC_DELAY		(HZ/5)
+
+int	drc_reply_cache_init(struct drc_cache *);
+void	drc_reply_cache_shutdown(struct drc_cache *);
+int	drc_cache_lookup(struct svc_rqst *, int, struct drc_cache *);
+void	drc_cache_update(struct svc_rqst *, int, __be32 *, struct drc_cache *);
+
+#endif /* DRC_H */
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 9d2fca5..b3e20e4 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
 	    addr.o rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \
-	    svc_xprt.o
+	    svc_xprt.o drc.o
 sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/drc.c b/net/sunrpc/drc.c
new file mode 100644
index 0000000..5987e5d
--- /dev/null
+++ b/net/sunrpc/drc.c
@@ -0,0 +1,326 @@
+/*
+ * net/sunrpc/drc.c
+ *
+ * Request reply cache. This is currently a global cache, but this may
+ * change in the future and be a per-client cache.
+ *
+ * This code is heavily inspired by the 44BSD implementation, although
+ * it does things a bit differently.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir-pn4DOG8n3UYbFoVRYvo4fw@public.gmane.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/list.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
+
+/* Size of reply cache. Common values are:
+ * 4.3BSD:	128
+ * 4.4BSD:	256
+ * Solaris2:	1024
+ * DEC Unix:	512-4096
+ */
+#define CACHESIZE		1024
+#define HASHSIZE		64
+
+/*
+ * Calculate the hash index from an XID.
+ */
+static inline u32 request_hash(u32 xid)
+{
+	u32 h = xid;
+	h ^= (xid >> 24);
+	return h & (HASHSIZE-1);
+}
+
+static int	drc_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
+
+int drc_reply_cache_init(struct drc_cache *dc)
+{
+	struct svc_cacherep	*rp;
+	int			i;
+
+	dc->cache_disabled = 1;
+	dc->cache_lock = __SPIN_LOCK_UNLOCKED(dc->cache_lock);
+
+	INIT_LIST_HEAD(&dc->lru_head);
+	i = CACHESIZE;
+	while (i) {
+		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
+		if (!rp)
+			goto out_nomem;
+		list_add(&rp->c_lru, &dc->lru_head);
+		rp->c_state = RC_UNUSED;
+		rp->c_type = RC_NOCACHE;
+		INIT_HLIST_NODE(&rp->c_hash);
+		i--;
+	}
+
+	dc->cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
+	if (!dc->cache_hash)
+		goto out_nomem;
+
+	dc->cache_disabled = 0;
+	return 0;
+out_nomem:
+	printk(KERN_ERR "drc: failed to allocate reply cache\n");
+	drc_reply_cache_shutdown(dc);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(drc_reply_cache_init);
+
+void drc_reply_cache_shutdown(struct drc_cache *dc)
+{
+	struct svc_cacherep	*rp;
+
+	while (!list_empty(&dc->lru_head)) {
+		rp = list_entry(dc->lru_head.next, struct svc_cacherep, c_lru);
+		if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
+			kfree(rp->c_replvec.iov_base);
+		list_del(&rp->c_lru);
+		kfree(rp);
+	}
+
+	dc->cache_disabled = 1;
+
+	kfree (dc->cache_hash);
+	dc->cache_hash = NULL;
+}
+EXPORT_SYMBOL_GPL(drc_reply_cache_shutdown);
+
+/*
+ * Move cache entry to end of LRU list
+ */
+static void
+lru_put_end(struct svc_cacherep *rp, struct drc_cache *dc)
+{
+	list_move_tail(&rp->c_lru, &dc->lru_head);
+}
+
+/*
+ * Move a cache entry from one hash list to another
+ */
+static void
+hash_refile(struct svc_cacherep *rp, struct drc_cache *dc)
+{
+	hlist_del_init(&rp->c_hash);
+	hlist_add_head(&rp->c_hash, dc->cache_hash + request_hash(rp->c_xid));
+}
+
+/*
+ * Try to find an entry matching the current call in the cache. When none
+ * is found, we grab the oldest unlocked entry off the LRU list.
+ * Note that no operation within the loop may sleep.
+ */
+int
+drc_cache_lookup(struct svc_rqst *rqstp, int type, struct drc_cache *dc)
+{
+	struct hlist_node	*hn;
+	struct hlist_head 	*rh;
+	struct svc_cacherep	*rp;
+	__be32			xid = rqstp->rq_xid;
+	u32			proto =  rqstp->rq_prot,
+				vers = rqstp->rq_vers,
+				proc = rqstp->rq_proc;
+	unsigned long		age;
+	int rtn;
+
+	rqstp->rq_cacherep = NULL;
+	if (dc->cache_disabled || type == RC_NOCACHE) {
+		dc->rcnocache++;
+		return RC_DOIT;
+	}
+
+	spin_lock(&dc->cache_lock);
+	rtn = RC_DOIT;
+
+	rh = &dc->cache_hash[request_hash(xid)];
+	hlist_for_each_entry(rp, hn, rh, c_hash) {
+		if (rp->c_state != RC_UNUSED &&
+		    xid == rp->c_xid && proc == rp->c_proc &&
+		    proto == rp->c_prot && vers == rp->c_vers &&
+		    time_before(jiffies, rp->c_timestamp + 120*HZ) &&
+		    memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) {
+			dc->rchits++;
+			goto found_entry;
+		}
+	}
+	dc->rcmisses++;
+
+	/* This loop shouldn't take more than a few iterations normally */
+	{
+	int	safe = 0;
+	list_for_each_entry(rp, &dc->lru_head, c_lru) {
+		if (rp->c_state != RC_INPROG)
+			break;
+		if (safe++ > CACHESIZE) {
+			printk("drc: loop in repcache LRU list\n");
+			dc->cache_disabled = 1;
+			goto out;
+		}
+	}
+	}
+
+	/* All entries on the LRU are in-progress. This should not happen */
+	if (&rp->c_lru == &dc->lru_head) {
+		static int	complaints;
+
+		printk(KERN_WARNING "drc: all repcache entries locked!\n");
+		if (++complaints > 5) {
+			printk(KERN_WARNING "drc: disabling repcache.\n");
+			dc->cache_disabled = 1;
+		}
+		goto out;
+	}
+
+	rqstp->rq_cacherep = rp;
+	rp->c_state = RC_INPROG;
+	rp->c_xid = xid;
+	rp->c_proc = proc;
+	memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr));
+	rp->c_prot = proto;
+	rp->c_vers = vers;
+	rp->c_timestamp = jiffies;
+
+	hash_refile(rp, dc);
+
+	/* release any buffer */
+	if (rp->c_type == RC_REPLBUFF) {
+		kfree(rp->c_replvec.iov_base);
+		rp->c_replvec.iov_base = NULL;
+	}
+	rp->c_type = RC_NOCACHE;
+ out:
+	spin_unlock(&dc->cache_lock);
+	return rtn;
+
+found_entry:
+	/* We found a matching entry which is either in progress or done. */
+	age = jiffies - rp->c_timestamp;
+	rp->c_timestamp = jiffies;
+	lru_put_end(rp, dc);
+
+	rtn = RC_DROPIT;
+	/* Request being processed or excessive rexmits */
+	if (rp->c_state == RC_INPROG || age < RC_DELAY)
+		goto out;
+
+	/* From the hall of fame of impractical attacks:
+	 * Is this a user who tries to snoop on the cache? */
+	rtn = RC_DOIT;
+	if (!rqstp->rq_secure && rp->c_secure)
+		goto out;
+
+	/* Compose RPC reply header */
+	switch (rp->c_type) {
+	case RC_NOCACHE:
+		break;
+	case RC_REPLSTAT:
+		svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
+		rtn = RC_REPLY;
+		break;
+	case RC_REPLBUFF:
+		if (!drc_cache_append(rqstp, &rp->c_replvec))
+			goto out;	/* should not happen */
+		rtn = RC_REPLY;
+		break;
+	default:
+		printk(KERN_WARNING "drc: bad repcache type %d\n", rp->c_type);
+		rp->c_state = RC_UNUSED;
+	}
+
+	goto out;
+}
+EXPORT_SYMBOL_GPL(drc_cache_lookup);
+
+/*
+ * Update a cache entry. This is called from XXX_dispatch when
+ * the procedure has been executed and the complete reply is in
+ * rqstp->rq_res.
+ *
+ * We're copying around data here rather than swapping buffers because
+ * the toplevel loop requires max-sized buffers, which would be a waste
+ * of memory for a cache with a max reply size of 100 bytes (diropokres).
+ *
+ * If we should start to use different types of cache entries tailored
+ * specifically for attrstat and fh's, we may save even more space.
+ *
+ * Also note that a cachetype of RC_NOCACHE can legally be passed when
+ * drc failed to encode a reply that otherwise would have been cached.
+ * In this case, drc_cache_update is called with statp == NULL.
+ */
+void
+drc_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp,
+		  struct drc_cache *dc)
+{
+	struct svc_cacherep *rp;
+	struct kvec	*resv = &rqstp->rq_res.head[0], *cachv;
+	int		len;
+
+	if (!(rp = rqstp->rq_cacherep) || dc->cache_disabled)
+		return;
+
+	len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
+	len >>= 2;
+
+	/* Don't cache excessive amounts of data and XDR failures */
+	if (!statp || len > (256 >> 2)) {
+		rp->c_state = RC_UNUSED;
+		return;
+	}
+
+	switch (cachetype) {
+	case RC_REPLSTAT:
+		if (len != 1)
+			printk("drc: RC_REPLSTAT/reply len %d!\n",len);
+		rp->c_replstat = *statp;
+		break;
+	case RC_REPLBUFF:
+		cachv = &rp->c_replvec;
+		cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
+		if (!cachv->iov_base) {
+			spin_lock(&dc->cache_lock);
+			rp->c_state = RC_UNUSED;
+			spin_unlock(&dc->cache_lock);
+			return;
+		}
+		cachv->iov_len = len << 2;
+		memcpy(cachv->iov_base, statp, len << 2);
+		break;
+	}
+	spin_lock(&dc->cache_lock);
+	lru_put_end(rp, dc);
+	rp->c_secure = rqstp->rq_secure;
+	rp->c_type = cachetype;
+	rp->c_state = RC_DONE;
+	rp->c_timestamp = jiffies;
+	spin_unlock(&dc->cache_lock);
+	return;
+}
+EXPORT_SYMBOL_GPL(drc_cache_update);
+
+/*
+ * Copy cached reply to current reply buffer. Should always fit.
+ * FIXME as reply is in a page, we should just attach the page, and
+ * keep a refcount....
+ */
+static int
+drc_cache_append(struct svc_rqst *rqstp, struct kvec *data)
+{
+	struct kvec	*vec = &rqstp->rq_res.head[0];
+
+	if (vec->iov_len + data->iov_len > PAGE_SIZE) {
+		printk(KERN_WARNING "drc: cached reply too large (%Zd).\n",
+				data->iov_len);
+		return 0;
+	}
+	memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
+	vec->iov_len += data->iov_len;
+	return 1;
+}
-- 
1.6.2




^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFC][PATCH 2/3] nlm:add DRC to NLM using sunrpc's universal DRC
  2009-10-27  3:22 [RFC][PATCH 0/3] nlm:Fix bug nlm cann't process retransmited request correctly Mi Jinlong
  2009-10-27  3:27 ` [RFC][PATCH 1/3] sunrpc:add an universal DRC to sunrpc Mi Jinlong
@ 2009-10-27  3:30 ` Mi Jinlong
  2009-10-27  3:31 ` [RFC][PATCH 3/3] nfsd:modify the nfsd's DRC to use " Mi Jinlong
  2 siblings, 0 replies; 4+ messages in thread
From: Mi Jinlong @ 2009-10-27  3:30 UTC (permalink / raw)
  To: Trond.Myklebust; +Cc: NFSv3 list, bfields, mingo

Add DRC to NLM using sunrpc's DRC.
Add nlm_dispatch function for NLM.

Signed-off-by: Mi Jinlong <mijinlong@cn.fujitsu.com>
---
 fs/lockd/svc.c              |   66 +++++++++++++++++++++++++++++++++++++++++++
 fs/lockd/svc4proc.c         |   52 +++++++++++++++++----------------
 include/linux/lockd/lockd.h |    6 ++++
 3 files changed, 99 insertions(+), 25 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1a54ae1..66d97ce 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -32,6 +32,7 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <net/ip.h>
 #include <linux/lockd/lockd.h>
@@ -76,6 +77,9 @@ static const int		nlm_port_min = 0, nlm_port_max = 65535;
 static struct ctl_table_header * nlm_sysctl_table;
 #endif
 
+/* DRC for NLM */
+struct drc_cache nlm_dc;
+
 static unsigned long get_lockd_grace_period(void)
 {
 	/* Note: nlm_timeout should always be nonzero */
@@ -363,6 +367,63 @@ out:
 }
 EXPORT_SYMBOL_GPL(lockd_down);
 
+/*
+ * DRC for NLM
+ */
+int
+nlm_dispatch(struct svc_rqst *rqstp, u32 *statp)
+{
+	struct svc_procedure *proc;
+	kxdrproc_t	   xdr;
+
+	dprintk("nlm_dispatch: vers %d proc %d\n",
+			rqstp->rq_vers, rqstp->rq_proc);
+	proc = rqstp->rq_procinfo;
+
+	/* Check whether we have this call in the cache. */
+	switch (drc_cache_lookup(rqstp, proc->pc_cachetype, &nlm_dc)) {
+	case RC_INTR:
+	case RC_DROPIT:
+		return 0;
+	case RC_REPLY:
+		return 1;
+	case RC_DOIT:;
+		/* do it */
+	}
+
+	/* Decode arguments */
+	xdr = proc->pc_decode;
+	if (xdr && !xdr(rqstp, (u32*)rqstp->rq_arg.head[0].iov_base,
+			rqstp->rq_argp)) {
+		dprintk("nlm: failed to decode arguments!\n");
+		drc_cache_update(rqstp, RC_NOCACHE, NULL, &nlm_dc);
+		*statp = rpc_garbage_args;
+		return 1;
+	}
+
+	*statp = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+
+	if (*statp == rpc_drop_reply) {
+		dprintk("nlm: Dropping request due to malloc failure!\n");
+		drc_cache_update(rqstp, RC_NOCACHE, NULL, &nlm_dc);
+		return 0;
+	}
+
+	if (*statp ==rpc_success && (xdr = proc->pc_encode) && !xdr(rqstp,
+	    (rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len),
+	     rqstp->rq_resp)) {
+	     dprintk("nlm: failed to encode result!\n");
+	     drc_cache_update(rqstp, RC_NOCACHE, NULL, &nlm_dc);
+	     *statp = rpc_system_err;
+	     return 1;
+	}
+
+	/* Store reply in cache */
+	drc_cache_update(rqstp, proc->pc_cachetype, statp + 1, &nlm_dc);
+	return 1;
+
+}
+
 #ifdef CONFIG_SYSCTL
 
 /*
@@ -525,6 +586,7 @@ module_param(nlm_max_connections, uint, 0644);
 
 static int __init init_nlm(void)
 {
+	drc_reply_cache_init(&nlm_dc);
 #ifdef CONFIG_SYSCTL
 	nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root);
 	return nlm_sysctl_table ? 0 : -ENOMEM;
@@ -536,6 +598,7 @@ static int __init init_nlm(void)
 static void __exit exit_nlm(void)
 {
 	/* FIXME: delete all NLM clients */
+	drc_reply_cache_shutdown(&nlm_dc);
 	nlm_shutdown_hosts();
 #ifdef CONFIG_SYSCTL
 	unregister_sysctl_table(nlm_sysctl_table);
@@ -552,12 +615,14 @@ static struct svc_version	nlmsvc_version1 = {
 		.vs_vers	= 1,
 		.vs_nproc	= 17,
 		.vs_proc	= nlmsvc_procedures,
+		.vs_dispatch	= nlm_dispatch,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 static struct svc_version	nlmsvc_version3 = {
 		.vs_vers	= 3,
 		.vs_nproc	= 24,
 		.vs_proc	= nlmsvc_procedures,
+		.vs_dispatch    = nlm_dispatch,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
@@ -565,6 +630,7 @@ static struct svc_version	nlmsvc_version4 = {
 		.vs_vers	= 4,
 		.vs_nproc	= 24,
 		.vs_proc	= nlmsvc_procedures4,
+		.vs_dispatch    = nlm_dispatch,
 		.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index bd173a6..e78cb89 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -13,6 +13,7 @@
 #include <linux/smp_lock.h>
 #include <linux/in.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/lockd/lockd.h>
@@ -468,7 +469,7 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 
 struct nlm_void			{ int dummy; };
 
-#define PROC(name, xargt, xrest, argt, rest, respsize)	\
+#define PROC(name, xargt, xrest, argt, rest, respsize, cache)	\
  { .pc_func	= (svc_procfunc) nlm4svc_proc_##name,	\
    .pc_decode	= (kxdrproc_t) nlm4svc_decode_##xargt,	\
    .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
@@ -476,36 +477,37 @@ struct nlm_void			{ int dummy; };
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
    .pc_xdrressize = respsize,				\
+   .pc_cachetype = cache, 				\
  }
 #define	Ck	(1+XDR_QUADLEN(NLM_MAXCOOKIELEN))	/* cookie */
 #define	No	(1+1024/4)				/* netobj */
 #define	St	1					/* status */
 #define	Rg	4					/* range (offset + length) */
 struct svc_procedure		nlmsvc_procedures4[] = {
-  PROC(null,		void,		void,		void,	void, 1),
-  PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
-  PROC(lock,		lockargs,	res,		args,	res, Ck+St),
-  PROC(cancel,		cancargs,	res,		args,	res, Ck+St),
-  PROC(unlock,		unlockargs,	res,		args,	res, Ck+St),
-  PROC(granted,		testargs,	res,		args,	res, Ck+St),
-  PROC(test_msg,	testargs,	norep,		args,	void, 1),
-  PROC(lock_msg,	lockargs,	norep,		args,	void, 1),
-  PROC(cancel_msg,	cancargs,	norep,		args,	void, 1),
-  PROC(unlock_msg,	unlockargs,	norep,		args,	void, 1),
-  PROC(granted_msg,	testargs,	norep,		args,	void, 1),
-  PROC(test_res,	testres,	norep,		res,	void, 1),
-  PROC(lock_res,	lockres,	norep,		res,	void, 1),
-  PROC(cancel_res,	cancelres,	norep,		res,	void, 1),
-  PROC(unlock_res,	unlockres,	norep,		res,	void, 1),
-  PROC(granted_res,	res,		norep,		res,	void, 1),
+  PROC(null,		void,		void,		void,	void, 1, RC_NOCACHE),
+  PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg, RC_NOCACHE),
+  PROC(lock,		lockargs,	res,		args,	res, Ck+St, RC_REPLSTAT),
+  PROC(cancel,		cancargs,	res,		args,	res, Ck+St, RC_NOCACHE),
+  PROC(unlock,		unlockargs,	res,		args,	res, Ck+St, RC_NOCACHE),
+  PROC(granted,		testargs,	res,		args,	res, Ck+St, RC_NOCACHE),
+  PROC(test_msg,	testargs,	norep,		args,	void, 1, RC_NOCACHE),
+  PROC(lock_msg,	lockargs,	norep,		args,	void, 1, RC_NOCACHE),
+  PROC(cancel_msg,	cancargs,	norep,		args,	void, 1, RC_NOCACHE),
+  PROC(unlock_msg,	unlockargs,	norep,		args,	void, 1, RC_NOCACHE),
+  PROC(granted_msg,	testargs,	norep,		args,	void, 1, RC_NOCACHE),
+  PROC(test_res,	testres,	norep,		res,	void, 1, RC_NOCACHE),
+  PROC(lock_res,	lockres,	norep,		res,	void, 1, RC_NOCACHE),
+  PROC(cancel_res,	cancelres,	norep,		res,	void, 1, RC_NOCACHE),
+  PROC(unlock_res,	unlockres,	norep,		res,	void, 1, RC_NOCACHE),
+  PROC(granted_res,	res,		norep,		res,	void, 1, RC_NOCACHE),
   /* statd callback */
-  PROC(sm_notify,	reboot,		void,		reboot,	void, 1),
-  PROC(none,		void,		void,		void,	void, 0),
-  PROC(none,		void,		void,		void,	void, 0),
-  PROC(none,		void,		void,		void,	void, 0),
-  PROC(share,		shareargs,	shareres,	args,	res, Ck+St+1),
-  PROC(unshare,		shareargs,	shareres,	args,	res, Ck+St+1),
-  PROC(nm_lock,		lockargs,	res,		args,	res, Ck+St),
-  PROC(free_all,	notify,		void,		args,	void, 1),
+  PROC(sm_notify,	reboot,		void,		reboot,	void, 1, RC_NOCACHE),
+  PROC(none,		void,		void,		void,	void, 0, RC_NOCACHE),
+  PROC(none,		void,		void,		void,	void, 0, RC_NOCACHE),
+  PROC(none,		void,		void,		void,	void, 0, RC_NOCACHE),
+  PROC(share,		shareargs,	shareres,	args,	res, Ck+St+1, RC_NOCACHE),
+  PROC(unshare,		shareargs,	shareres,	args,	res, Ck+St+1, RC_NOCACHE),
+  PROC(nm_lock,		lockargs,	res,		args,	res, Ck+St, RC_NOCACHE),
+  PROC(free_all,	notify,		void,		args,	void, 1, RC_NOCACHE),
 
 };
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index a34dea4..7bfea5c 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -198,6 +198,12 @@ extern int			nsm_use_hostnames;
 extern u32			nsm_local_state;
 
 /*
+ * DRC for NLM
+ */
+
+int nlm_dispatch(struct svc_rqst *rqstp, u32 *stap);
+
+/*
  * Lockd client functions
  */
 struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
-- 
1.6.2



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFC][PATCH 3/3] nfsd:modify the nfsd's DRC to use sunrpc's universal DRC
  2009-10-27  3:22 [RFC][PATCH 0/3] nlm:Fix bug nlm cann't process retransmited request correctly Mi Jinlong
  2009-10-27  3:27 ` [RFC][PATCH 1/3] sunrpc:add an universal DRC to sunrpc Mi Jinlong
  2009-10-27  3:30 ` [RFC][PATCH 2/3] nlm:add DRC to NLM using sunrpc's universal DRC Mi Jinlong
@ 2009-10-27  3:31 ` Mi Jinlong
  2 siblings, 0 replies; 4+ messages in thread
From: Mi Jinlong @ 2009-10-27  3:31 UTC (permalink / raw)
  To: Trond.Myklebust; +Cc: NFSv3 list, bfields, mingo

Modify the nfsd's DRC to use sunrpc's universal DRC

Signed-off-by: Mi Jinlong <mijinlong@cn.fujitsu.com>
---
 arch/parisc/kernel/sys_parisc32.c |    2 +-
 arch/s390/kernel/compat_linux.c   |    2 +-
 arch/sparc/kernel/sys_sparc32.c   |    2 +-
 fs/nfsd/Makefile                  |    2 +-
 fs/nfsd/nfs2acl.c                 |    2 +-
 fs/nfsd/nfs3acl.c                 |    2 +-
 fs/nfsd/nfs3proc.c                |    2 +-
 fs/nfsd/nfs4proc.c                |    2 +-
 fs/nfsd/nfs4state.c               |    2 +-
 fs/nfsd/nfscache.c                |  330 -------------------------------------
 fs/nfsd/nfsctl.c                  |   10 +-
 fs/nfsd/nfsproc.c                 |    2 +-
 fs/nfsd/nfssvc.c                  |   13 +-
 fs/nfsd/stats.c                   |    9 +-
 include/linux/nfsd/cache.h        |   62 -------
 include/linux/nfsd/stats.h        |    3 -
 16 files changed, 30 insertions(+), 417 deletions(-)
 delete mode 100644 fs/nfsd/nfscache.c

diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 561388b..3c7498e 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -29,8 +29,8 @@
 #include <linux/nfs_fs.h>
 #include <linux/ncp_fs.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr.h>
 #include <linux/nfsd/syscall.h>
 #include <linux/poll.h>
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 0debcec..d441e48 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -35,8 +35,8 @@
 #include <linux/quota.h>
 #include <linux/module.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr.h>
 #include <linux/nfsd/syscall.h>
 #include <linux/poll.h>
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index 04e28b2..dca7244 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -27,8 +27,8 @@
 #include <linux/quota.h>
 #include <linux/module.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr.h>
 #include <linux/nfsd/syscall.h>
 #include <linux/poll.h>
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9b118ee..fdddb23 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_NFSD)	+= nfsd.o
 
 nfsd-y 			:= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
-			   export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+			   export.o auth.o lockd.o nfsxdr.o stats.o
 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4e3219e..06c7293 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -7,9 +7,9 @@
  */
 
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/nfs.h>
 #include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr.h>
 #include <linux/nfsd/xdr3.h>
 #include <linux/posix_acl.h>
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9981dbb..f6527d3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -7,9 +7,9 @@
  */
 
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/nfs3.h>
 #include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr3.h>
 #include <linux/posix_acl.h>
 #include <linux/nfsacl.h>
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a713c41..932a343 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -21,8 +21,8 @@
 #include <linux/magic.h>
 
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr3.h>
 #include <linux/nfs3.h>
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bebc0c2..5ad2212 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -41,8 +41,8 @@
 #include <linux/file.h>
 
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfs4.h>
 #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2153f9b..efde9c8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -39,8 +39,8 @@
 #include <linux/slab.h>
 
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/workqueue.h>
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
deleted file mode 100644
index 4638635..0000000
--- a/fs/nfsd/nfscache.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * linux/fs/nfsd/nfscache.c
- *
- * Request reply cache. This is currently a global cache, but this may
- * change in the future and be a per-client cache.
- *
- * This code is heavily inspired by the 44BSD implementation, although
- * it does things a bit differently.
- *
- * Copyright (C) 1995, 1996 Olaf Kirch <okir-pn4DOG8n3UYbFoVRYvo4fw@public.gmane.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-
-/* Size of reply cache. Common values are:
- * 4.3BSD:	128
- * 4.4BSD:	256
- * Solaris2:	1024
- * DEC Unix:	512-4096
- */
-#define CACHESIZE		1024
-#define HASHSIZE		64
-
-static struct hlist_head *	cache_hash;
-static struct list_head 	lru_head;
-static int			cache_disabled = 1;
-
-/*
- * Calculate the hash index from an XID.
- */
-static inline u32 request_hash(u32 xid)
-{
-	u32 h = xid;
-	h ^= (xid >> 24);
-	return h & (HASHSIZE-1);
-}
-
-static int	nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
-
-/*
- * locking for the reply cache:
- * A cache entry is "single use" if c_state == RC_INPROG
- * Otherwise, it when accessing _prev or _next, the lock must be held.
- */
-static DEFINE_SPINLOCK(cache_lock);
-
-int nfsd_reply_cache_init(void)
-{
-	struct svc_cacherep	*rp;
-	int			i;
-
-	INIT_LIST_HEAD(&lru_head);
-	i = CACHESIZE;
-	while (i) {
-		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
-		if (!rp)
-			goto out_nomem;
-		list_add(&rp->c_lru, &lru_head);
-		rp->c_state = RC_UNUSED;
-		rp->c_type = RC_NOCACHE;
-		INIT_HLIST_NODE(&rp->c_hash);
-		i--;
-	}
-
-	cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
-	if (!cache_hash)
-		goto out_nomem;
-
-	cache_disabled = 0;
-	return 0;
-out_nomem:
-	printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
-	nfsd_reply_cache_shutdown();
-	return -ENOMEM;
-}
-
-void nfsd_reply_cache_shutdown(void)
-{
-	struct svc_cacherep	*rp;
-
-	while (!list_empty(&lru_head)) {
-		rp = list_entry(lru_head.next, struct svc_cacherep, c_lru);
-		if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
-			kfree(rp->c_replvec.iov_base);
-		list_del(&rp->c_lru);
-		kfree(rp);
-	}
-
-	cache_disabled = 1;
-
-	kfree (cache_hash);
-	cache_hash = NULL;
-}
-
-/*
- * Move cache entry to end of LRU list
- */
-static void
-lru_put_end(struct svc_cacherep *rp)
-{
-	list_move_tail(&rp->c_lru, &lru_head);
-}
-
-/*
- * Move a cache entry from one hash list to another
- */
-static void
-hash_refile(struct svc_cacherep *rp)
-{
-	hlist_del_init(&rp->c_hash);
-	hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid));
-}
-
-/*
- * Try to find an entry matching the current call in the cache. When none
- * is found, we grab the oldest unlocked entry off the LRU list.
- * Note that no operation within the loop may sleep.
- */
-int
-nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
-{
-	struct hlist_node	*hn;
-	struct hlist_head 	*rh;
-	struct svc_cacherep	*rp;
-	__be32			xid = rqstp->rq_xid;
-	u32			proto =  rqstp->rq_prot,
-				vers = rqstp->rq_vers,
-				proc = rqstp->rq_proc;
-	unsigned long		age;
-	int rtn;
-
-	rqstp->rq_cacherep = NULL;
-	if (cache_disabled || type == RC_NOCACHE) {
-		nfsdstats.rcnocache++;
-		return RC_DOIT;
-	}
-
-	spin_lock(&cache_lock);
-	rtn = RC_DOIT;
-
-	rh = &cache_hash[request_hash(xid)];
-	hlist_for_each_entry(rp, hn, rh, c_hash) {
-		if (rp->c_state != RC_UNUSED &&
-		    xid == rp->c_xid && proc == rp->c_proc &&
-		    proto == rp->c_prot && vers == rp->c_vers &&
-		    time_before(jiffies, rp->c_timestamp + 120*HZ) &&
-		    memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) {
-			nfsdstats.rchits++;
-			goto found_entry;
-		}
-	}
-	nfsdstats.rcmisses++;
-
-	/* This loop shouldn't take more than a few iterations normally */
-	{
-	int	safe = 0;
-	list_for_each_entry(rp, &lru_head, c_lru) {
-		if (rp->c_state != RC_INPROG)
-			break;
-		if (safe++ > CACHESIZE) {
-			printk("nfsd: loop in repcache LRU list\n");
-			cache_disabled = 1;
-			goto out;
-		}
-	}
-	}
-
-	/* All entries on the LRU are in-progress. This should not happen */
-	if (&rp->c_lru == &lru_head) {
-		static int	complaints;
-
-		printk(KERN_WARNING "nfsd: all repcache entries locked!\n");
-		if (++complaints > 5) {
-			printk(KERN_WARNING "nfsd: disabling repcache.\n");
-			cache_disabled = 1;
-		}
-		goto out;
-	}
-
-	rqstp->rq_cacherep = rp;
-	rp->c_state = RC_INPROG;
-	rp->c_xid = xid;
-	rp->c_proc = proc;
-	memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr));
-	rp->c_prot = proto;
-	rp->c_vers = vers;
-	rp->c_timestamp = jiffies;
-
-	hash_refile(rp);
-
-	/* release any buffer */
-	if (rp->c_type == RC_REPLBUFF) {
-		kfree(rp->c_replvec.iov_base);
-		rp->c_replvec.iov_base = NULL;
-	}
-	rp->c_type = RC_NOCACHE;
- out:
-	spin_unlock(&cache_lock);
-	return rtn;
-
-found_entry:
-	/* We found a matching entry which is either in progress or done. */
-	age = jiffies - rp->c_timestamp;
-	rp->c_timestamp = jiffies;
-	lru_put_end(rp);
-
-	rtn = RC_DROPIT;
-	/* Request being processed or excessive rexmits */
-	if (rp->c_state == RC_INPROG || age < RC_DELAY)
-		goto out;
-
-	/* From the hall of fame of impractical attacks:
-	 * Is this a user who tries to snoop on the cache? */
-	rtn = RC_DOIT;
-	if (!rqstp->rq_secure && rp->c_secure)
-		goto out;
-
-	/* Compose RPC reply header */
-	switch (rp->c_type) {
-	case RC_NOCACHE:
-		break;
-	case RC_REPLSTAT:
-		svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
-		rtn = RC_REPLY;
-		break;
-	case RC_REPLBUFF:
-		if (!nfsd_cache_append(rqstp, &rp->c_replvec))
-			goto out;	/* should not happen */
-		rtn = RC_REPLY;
-		break;
-	default:
-		printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
-		rp->c_state = RC_UNUSED;
-	}
-
-	goto out;
-}
-
-/*
- * Update a cache entry. This is called from nfsd_dispatch when
- * the procedure has been executed and the complete reply is in
- * rqstp->rq_res.
- *
- * We're copying around data here rather than swapping buffers because
- * the toplevel loop requires max-sized buffers, which would be a waste
- * of memory for a cache with a max reply size of 100 bytes (diropokres).
- *
- * If we should start to use different types of cache entries tailored
- * specifically for attrstat and fh's, we may save even more space.
- *
- * Also note that a cachetype of RC_NOCACHE can legally be passed when
- * nfsd failed to encode a reply that otherwise would have been cached.
- * In this case, nfsd_cache_update is called with statp == NULL.
- */
-void
-nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
-{
-	struct svc_cacherep *rp;
-	struct kvec	*resv = &rqstp->rq_res.head[0], *cachv;
-	int		len;
-
-	if (!(rp = rqstp->rq_cacherep) || cache_disabled)
-		return;
-
-	len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
-	len >>= 2;
-
-	/* Don't cache excessive amounts of data and XDR failures */
-	if (!statp || len > (256 >> 2)) {
-		rp->c_state = RC_UNUSED;
-		return;
-	}
-
-	switch (cachetype) {
-	case RC_REPLSTAT:
-		if (len != 1)
-			printk("nfsd: RC_REPLSTAT/reply len %d!\n",len);
-		rp->c_replstat = *statp;
-		break;
-	case RC_REPLBUFF:
-		cachv = &rp->c_replvec;
-		cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
-		if (!cachv->iov_base) {
-			spin_lock(&cache_lock);
-			rp->c_state = RC_UNUSED;
-			spin_unlock(&cache_lock);
-			return;
-		}
-		cachv->iov_len = len << 2;
-		memcpy(cachv->iov_base, statp, len << 2);
-		break;
-	}
-	spin_lock(&cache_lock);
-	lru_put_end(rp);
-	rp->c_secure = rqstp->rq_secure;
-	rp->c_type = cachetype;
-	rp->c_state = RC_DONE;
-	rp->c_timestamp = jiffies;
-	spin_unlock(&cache_lock);
-	return;
-}
-
-/*
- * Copy cached reply to current reply buffer. Should always fit.
- * FIXME as reply is in a page, we should just attach the page, and
- * keep a refcount....
- */
-static int
-nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
-{
-	struct kvec	*vec = &rqstp->rq_res.head[0];
-
-	if (vec->iov_len + data->iov_len > PAGE_SIZE) {
-		printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n",
-				data->iov_len);
-		return 0;
-	}
-	memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
-	vec->iov_len += data->iov_len;
-	return 1;
-}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5c01fc1..100bb9f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -31,9 +31,9 @@
 #include <linux/nfsd_idmap.h>
 #include <linux/lockd/bind.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr.h>
 #include <linux/nfsd/syscall.h>
 #include <linux/lockd/lockd.h>
@@ -42,6 +42,8 @@
 #include <asm/uaccess.h>
 #include <net/ipv6.h>
 
+struct drc_cache nfsd_dc;
+
 /*
  *	We have a single directory with 9 nodes in it.
  */
@@ -1392,7 +1394,7 @@ static int __init init_nfsd(void)
 	if (retval)
 		return retval;
 	nfsd_stat_init();	/* Statistics */
-	retval = nfsd_reply_cache_init();
+	retval = drc_reply_cache_init(&nfsd_dc);
 	if (retval)
 		goto out_free_stat;
 	retval = nfsd_export_init();
@@ -1418,7 +1420,7 @@ out_free_lockd:
 	nfsd_lockd_shutdown();
 	nfsd_export_shutdown();
 out_free_cache:
-	nfsd_reply_cache_shutdown();
+	drc_reply_cache_shutdown(&nfsd_dc);
 out_free_stat:
 	nfsd_stat_shutdown();
 	nfsd4_free_slabs();
@@ -1428,7 +1430,7 @@ out_free_stat:
 static void __exit exit_nfsd(void)
 {
 	nfsd_export_shutdown();
-	nfsd_reply_cache_shutdown();
+	drc_reply_cache_shutdown(&nfsd_dc);
 	remove_proc_entry("fs/nfs/exports", NULL);
 	remove_proc_entry("fs/nfs", NULL);
 	nfsd_stat_shutdown();
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0eb9c82..4be2996 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -21,8 +21,8 @@
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr.h>
 
 typedef struct svc_rqst	svc_rqst;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 67ea83e..6206018 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -26,6 +26,7 @@
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/nfsd/nfsd.h>
@@ -42,6 +43,8 @@ extern struct svc_program	nfsd_program;
 static int			nfsd(void *vrqstp);
 struct timeval			nfssvc_boot;
 
+extern struct drc_cache nfsd_dc;
+
 /*
  * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
  * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
@@ -554,7 +557,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	proc = rqstp->rq_procinfo;
 
 	/* Check whether we have this call in the cache. */
-	switch (nfsd_cache_lookup(rqstp, proc->pc_cachetype)) {
+	switch (drc_cache_lookup(rqstp, proc->pc_cachetype, &nfsd_dc)) {
 	case RC_INTR:
 	case RC_DROPIT:
 		return 0;
@@ -569,7 +572,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base,
 			rqstp->rq_argp)) {
 		dprintk("nfsd: failed to decode arguments!\n");
-		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+		drc_cache_update(rqstp, RC_NOCACHE, NULL, &nfsd_dc);
 		*statp = rpc_garbage_args;
 		return 1;
 	}
@@ -586,7 +589,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
 	if (nfserr == nfserr_dropit) {
 		dprintk("nfsd: Dropping request; may be revisited later\n");
-		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+		drc_cache_update(rqstp, RC_NOCACHE, NULL, &nfsd_dc);
 		return 0;
 	}
 
@@ -602,14 +605,14 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 				rqstp->rq_resp)) {
 			/* Failed to encode result. Release cache entry */
 			dprintk("nfsd: failed to encode result!\n");
-			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+			drc_cache_update(rqstp, RC_NOCACHE, NULL, &nfsd_dc);
 			*statp = rpc_system_err;
 			return 1;
 		}
 	}
 
 	/* Store reply in cache. */
-	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
+	drc_cache_update(rqstp, proc->pc_cachetype, statp + 1, &nfsd_dc);
 	return 1;
 }
 
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 71944cd..c671a1e 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -31,10 +31,13 @@
 #include <linux/module.h>
 
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/drc.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/stats.h>
 
+extern struct drc_cache nfsd_dc;
+
 struct nfsd_stats	nfsdstats;
 struct svc_stat		nfsd_svcstats = {
 	.program	= &nfsd_program,
@@ -45,9 +48,9 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
 	int i;
 
 	seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
-		      nfsdstats.rchits,
-		      nfsdstats.rcmisses,
-		      nfsdstats.rcnocache,
+		      nfsd_dc.rchits,
+		      nfsd_dc.rcmisses,
+		      nfsd_dc.rcnocache,
 		      nfsdstats.fh_stale,
 		      nfsdstats.fh_lookup,
 		      nfsdstats.fh_anon,
diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h
index 3a3f589..a8cbcde 100644
--- a/include/linux/nfsd/cache.h
+++ b/include/linux/nfsd/cache.h
@@ -13,68 +13,6 @@
 #include <linux/in.h>
 #include <linux/uio.h>
 
-/*
- * Representation of a reply cache entry.
- */
-struct svc_cacherep {
-	struct hlist_node	c_hash;
-	struct list_head	c_lru;
-
-	unsigned char		c_state,	/* unused, inprog, done */
-				c_type,		/* status, buffer */
-				c_secure : 1;	/* req came from port < 1024 */
-	struct sockaddr_in	c_addr;
-	__be32			c_xid;
-	u32			c_prot;
-	u32			c_proc;
-	u32			c_vers;
-	unsigned long		c_timestamp;
-	union {
-		struct kvec	u_vec;
-		__be32		u_status;
-	}			c_u;
-};
-
-#define c_replvec		c_u.u_vec
-#define c_replstat		c_u.u_status
-
-/* cache entry states */
-enum {
-	RC_UNUSED,
-	RC_INPROG,
-	RC_DONE
-};
-
-/* return values */
-enum {
-	RC_DROPIT,
-	RC_REPLY,
-	RC_DOIT,
-	RC_INTR
-};
-
-/*
- * Cache types.
- * We may want to add more types one day, e.g. for diropres and
- * attrstat replies. Using cache entries with fixed length instead
- * of buffer pointers may be more efficient.
- */
-enum {
-	RC_NOCACHE,
-	RC_REPLSTAT,
-	RC_REPLBUFF,
-};
-
-/*
- * If requests are retransmitted within this interval, they're dropped.
- */
-#define RC_DELAY		(HZ/5)
-
-int	nfsd_reply_cache_init(void);
-void	nfsd_reply_cache_shutdown(void);
-int	nfsd_cache_lookup(struct svc_rqst *, int);
-void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
-
 #ifdef CONFIG_NFSD_V4
 void	nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
 #else  /* CONFIG_NFSD_V4 */
diff --git a/include/linux/nfsd/stats.h b/include/linux/nfsd/stats.h
index 2693ef6..7c5bf0e 100644
--- a/include/linux/nfsd/stats.h
+++ b/include/linux/nfsd/stats.h
@@ -17,9 +17,6 @@
 #ifdef __KERNEL__
 
 struct nfsd_stats {
-	unsigned int	rchits;		/* repcache hits */
-	unsigned int	rcmisses;	/* repcache hits */
-	unsigned int	rcnocache;	/* uncached reqs */
 	unsigned int	fh_stale;	/* FH stale error */
 	unsigned int	fh_lookup;	/* dentry cached */
 	unsigned int	fh_anon;	/* anon file dentry returned */
-- 
1.6.2



^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2009-10-27  3:30 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-27  3:22 [RFC][PATCH 0/3] nlm:Fix bug nlm cann't process retransmited request correctly Mi Jinlong
2009-10-27  3:27 ` [RFC][PATCH 1/3] sunrpc:add an universal DRC to sunrpc Mi Jinlong
2009-10-27  3:30 ` [RFC][PATCH 2/3] nlm:add DRC to NLM using sunrpc's universal DRC Mi Jinlong
2009-10-27  3:31 ` [RFC][PATCH 3/3] nfsd:modify the nfsd's DRC to use " Mi Jinlong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox