Linux NFS development
 help / color / mirror / Atom feed
* [RFC,PATCH 00/38] SVC Transport Switch
@ 2007-11-29 22:39 Tom Tucker
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  0 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:39 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs

The following series implements a pluggable transport switch for
RPC servers. This patchset is a rollup of the original
plus incremental patches.

In addition to the incremental change rollup:

- The address management logic in svc_xprt was cleaned up a
  based on feedback from Chuck Lever,

- A race was fixed whereby UDP RPC from different clients in parallel
  could corrupt each other's addresses.

- Changes were made to the sysctl implementation to comply 
  with the new 2.6.24 requirements regarding sysctl ids.

The following testing was done:

- Connectathon on V3 and V4 on TCP, UDP and RDMA mounts

- Kernel build on V3 RDMA mount.

This patchset is against the 2.6.24-rc3 kernel tree.

-- 
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>

^ permalink raw reply	[flat|nested] 62+ messages in thread

* [RFC,PATCH 01/38] svc: Add an svc transport class
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-11-29 22:39   ` Tom Tucker
  2007-11-29 22:39   ` [RFC,PATCH 02/38] svc: Make svc_sock the tcp/udp transport Tom Tucker
                     ` (36 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:39 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


The transport class (svc_xprt_class) represents a type of transport, e.g. 
udp, tcp, rdma.  A transport class has a unique name and a set of transport 
operations kept in the svc_xprt_ops structure.

A transport class can be dynamically registered and unregisterd. The 
svc_xprt_class represents the module that implements the transport
type and keeps reference counts on the module to avoid unloading while
there are active users.

The endpoint (svc_xprt) is a generic, transport independent endpoint that can 
be used to send and receive data for an RPC service. It inherits it's 
operations from the transport class. 

A transport driver module registers and unregisters itself with svc sunrpc
by calling svc_reg_xprt_class, and svc_unreg_xprt_class respectively. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/debug.h    |    1 
 include/linux/sunrpc/svc_xprt.h |   31 +++++++++++++
 net/sunrpc/Makefile             |    3 +
 net/sunrpc/svc_xprt.c           |   95 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 129 insertions(+), 1 deletions(-)

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 3347c72..1456a0b 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -21,6 +21,7 @@
 #define RPCDBG_SCHED		0x0040
 #define RPCDBG_TRANS		0x0080
 #define RPCDBG_SVCSOCK		0x0100
+#define RPCDBG_SVCXPRT		0x0100
 #define RPCDBG_SVCDSP		0x0200
 #define RPCDBG_MISC		0x0400
 #define RPCDBG_CACHE		0x0800
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
new file mode 100644
index 0000000..a8b1da8
--- /dev/null
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -0,0 +1,31 @@
+/*
+ * linux/include/linux/sunrpc/svc_xprt.h
+ *
+ * RPC server transport I/O
+ */
+
+#ifndef SUNRPC_SVC_XPRT_H
+#define SUNRPC_SVC_XPRT_H
+
+#include <linux/sunrpc/svc.h>
+
+struct svc_xprt_ops {
+};
+
+struct svc_xprt_class {
+	const char		*xcl_name;
+	struct module		*xcl_owner;
+	struct svc_xprt_ops	*xcl_ops;
+	struct list_head	xcl_list;
+};
+
+struct svc_xprt {
+	struct svc_xprt_class	*xpt_class;
+	struct svc_xprt_ops	*xpt_ops;
+};
+
+int	svc_reg_xprt_class(struct svc_xprt_class *);
+int	svc_unreg_xprt_class(struct svc_xprt_class *);
+void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *);
+
+#endif /* SUNRPC_SVC_XPRT_H */
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 5c69a72..92e1dbe 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -11,6 +11,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    auth.o auth_null.o auth_unix.o \
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
 	    rpcb_clnt.o timer.o xdr.o \
-	    sunrpc_syms.o cache.o rpc_pipe.o
+	    sunrpc_syms.o cache.o rpc_pipe.o \
+	    svc_xprt.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
new file mode 100644
index 0000000..92ea85b
--- /dev/null
+++ b/net/sunrpc/svc_xprt.c
@@ -0,0 +1,95 @@
+/*
+ * linux/net/sunrpc/svc_xprt.c
+ *
+ * Author: Tom Tucker <tom@opengridcomputing.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <linux/freezer.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/tcp_states.h>
+#include <linux/uaccess.h>
+#include <asm/ioctls.h>
+
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/svc_xprt.h>
+
+#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
+
+/* List of registered transport classes */
+static DEFINE_SPINLOCK(svc_xprt_class_lock);
+static LIST_HEAD(svc_xprt_class_list);
+
+int svc_reg_xprt_class(struct svc_xprt_class *xcl)
+{
+	struct svc_xprt_class *cl;
+	int res = -EEXIST;
+
+	dprintk("svc: Adding svc transport class '%s'\n",
+		xcl->xcl_name);
+
+	INIT_LIST_HEAD(&xcl->xcl_list);
+	spin_lock(&svc_xprt_class_lock);
+	list_for_each_entry(cl, &svc_xprt_class_list, xcl_list) {
+		if (xcl == cl)
+			goto out;
+	}
+	list_add_tail(&xcl->xcl_list, &svc_xprt_class_list);
+	res = 0;
+out:
+	spin_unlock(&svc_xprt_class_lock);
+	return res;
+}
+EXPORT_SYMBOL_GPL(svc_reg_xprt_class);
+
+int svc_unreg_xprt_class(struct svc_xprt_class *xcl)
+{
+	struct svc_xprt_class *cl;
+	int res = 0;
+
+	dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name);
+
+	spin_lock(&svc_xprt_class_lock);
+	list_for_each_entry(cl, &svc_xprt_class_list, xcl_list) {
+		if (xcl == cl) {
+			list_del_init(&cl->xcl_list);
+			goto out;
+		}
+	}
+	res = -ENOENT;
+ out:
+	spin_unlock(&svc_xprt_class_lock);
+	return res;
+}
+EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
+
+/*
+ * Called by transport drivers to initialize the transport independent
+ * portion of the transport instance.
+ */
+void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt)
+{
+	memset(xprt, 0, sizeof(*xprt));
+	xprt->xpt_class = xcl;
+	xprt->xpt_ops = xcl->xcl_ops;
+}
+EXPORT_SYMBOL_GPL(svc_xprt_init);

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 02/38] svc: Make svc_sock the tcp/udp transport
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  2007-11-29 22:39   ` [RFC,PATCH 01/38] svc: Add an svc transport class Tom Tucker
@ 2007-11-29 22:39   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 03/38] svc: Change the svc_sock in the rqstp structure to a transport Tom Tucker
                     ` (35 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:39 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Make TCP and UDP svc_sock transports, and register them
with the svc transport core. 

A transport type (svc_sock) has an svc_xprt as its first member, 
and calls svc_xprt_init to initialize this field.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/debug.h   |    1 -
 include/linux/sunrpc/svcsock.h |    4 ++++
 net/sunrpc/sunrpc_syms.c       |    4 +++-
 net/sunrpc/svcsock.c           |   33 ++++++++++++++++++++++++++++++++-
 4 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 1456a0b..3482a38 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -20,7 +20,6 @@
 #define RPCDBG_BIND		0x0020
 #define RPCDBG_SCHED		0x0040
 #define RPCDBG_TRANS		0x0080
-#define RPCDBG_SVCSOCK		0x0100
 #define RPCDBG_SVCXPRT		0x0100
 #define RPCDBG_SVCDSP		0x0200
 #define RPCDBG_MISC		0x0400
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index a53e0fa..1878cbe 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -10,11 +10,13 @@
 #define SUNRPC_SVCSOCK_H
 
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
 
 /*
  * RPC server socket.
  */
 struct svc_sock {
+	struct svc_xprt		sk_xprt;
 	struct list_head	sk_ready;	/* list of ready sockets */
 	struct list_head	sk_list;	/* list of all sockets */
 	struct socket *		sk_sock;	/* berkeley socket layer */
@@ -78,6 +80,8 @@ int		svc_addsock(struct svc_serv *serv,
 			    int fd,
 			    char *name_return,
 			    int *proto);
+void		svc_init_xprt_sock(void);
+void		svc_cleanup_xprt_sock(void);
 
 /*
  * svc_makesock socket characteristics
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 33d89e8..79ea05f 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -151,7 +151,8 @@ init_sunrpc(void)
 #endif
 	cache_register(&ip_map_cache);
 	cache_register(&unix_gid_cache);
-	init_socket_xprt();
+	svc_init_xprt_sock();	/* svc sock transport */
+	init_socket_xprt();	/* clnt sock transport */
 	rpcauth_init_module();
 out:
 	return err;
@@ -162,6 +163,7 @@ cleanup_sunrpc(void)
 {
 	rpcauth_remove_module();
 	cleanup_socket_xprt();
+	svc_cleanup_xprt_sock();
 	unregister_rpc_pipefs();
 	rpc_destroy_mempool();
 	if (cache_unregister(&ip_map_cache))
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c75bffe..4755467 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -75,7 +75,7 @@
  *
  */
 
-#define RPCDBG_FACILITY	RPCDBG_SVCSOCK
+#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
 
 static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
@@ -900,12 +900,21 @@ svc_udp_sendto(struct svc_rqst *rqstp)
 	return error;
 }
 
+static struct svc_xprt_ops svc_udp_ops = {
+};
+
+static struct svc_xprt_class svc_udp_class = {
+	.xcl_name = "udp",
+	.xcl_ops = &svc_udp_ops,
+};
+
 static void
 svc_udp_init(struct svc_sock *svsk)
 {
 	int one = 1;
 	mm_segment_t oldfs;
 
+	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt);
 	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
 	svsk->sk_sk->sk_write_space = svc_write_space;
 	svsk->sk_recvfrom = svc_udp_recvfrom;
@@ -1344,12 +1353,33 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 	return sent;
 }
 
+static struct svc_xprt_ops svc_tcp_ops = {
+};
+
+static struct svc_xprt_class svc_tcp_class = {
+	.xcl_name = "tcp",
+	.xcl_ops = &svc_tcp_ops,
+};
+
+void svc_init_xprt_sock(void)
+{
+	svc_reg_xprt_class(&svc_tcp_class);
+	svc_reg_xprt_class(&svc_udp_class);
+}
+
+void svc_cleanup_xprt_sock(void)
+{
+	svc_unreg_xprt_class(&svc_tcp_class);
+	svc_unreg_xprt_class(&svc_udp_class);
+}
+
 static void
 svc_tcp_init(struct svc_sock *svsk)
 {
 	struct sock	*sk = svsk->sk_sk;
 	struct tcp_sock *tp = tcp_sk(sk);
 
+	svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt);
 	svsk->sk_recvfrom = svc_tcp_recvfrom;
 	svsk->sk_sendto = svc_tcp_sendto;
 
@@ -1965,3 +1995,4 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
 	spin_unlock(&svsk->sk_lock);
 	return dr;
 }
+

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 03/38] svc: Change the svc_sock in the rqstp structure to a transport
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  2007-11-29 22:39   ` [RFC,PATCH 01/38] svc: Add an svc transport class Tom Tucker
  2007-11-29 22:39   ` [RFC,PATCH 02/38] svc: Make svc_sock the tcp/udp transport Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 04/38] svc: Add a max payload value to the transport Tom Tucker
                     ` (34 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


The rqstp structure contains a pointer to the transport for the 
RPC request. This functionaly trivial patch adds an unamed union 
with pointers to both svc_sock and svc_xprt. Ultimately the 
union will be removed and only the rq_xprt field will remain. This 
allows incrementally extracting transport independent interfaces without 
one gigundo patch.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc.h |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 8531a70..37f7448 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -204,7 +204,10 @@ union svc_addr_u {
 struct svc_rqst {
 	struct list_head	rq_list;	/* idle list */
 	struct list_head	rq_all;		/* all threads list */
-	struct svc_sock *	rq_sock;	/* socket */
+	union {
+		struct svc_xprt *	rq_xprt;	/* transport ptr */
+		struct svc_sock *	rq_sock; 	/* socket ptr */
+	};
 	struct sockaddr_storage	rq_addr;	/* peer address */
 	size_t			rq_addrlen;
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 04/38] svc: Add a max payload value to the transport
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (2 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 03/38] svc: Change the svc_sock in the rqstp structure to a transport Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
       [not found]     ` <20071129224002.14563.96227.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  2007-11-29 22:40   ` [RFC,PATCH 05/38] svc: Move sk_sendto and sk_recvfrom to svc_xprt_class Tom Tucker
                     ` (33 subsequent siblings)
  37 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


The svc_max_payload function currently looks at the socket type
to determine the max payload. Add a max payload value to svc_xprt_class
so it can be returned directly. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    1 +
 net/sunrpc/svc.c                |    4 +---
 net/sunrpc/svcsock.c            |    2 ++
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index a8b1da8..b4ce054 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -17,6 +17,7 @@ struct svc_xprt_class {
 	struct module		*xcl_owner;
 	struct svc_xprt_ops	*xcl_ops;
 	struct list_head	xcl_list;
+	u32			xcl_max_payload;
 };
 
 struct svc_xprt {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a4a6bf7..ce59044 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1054,10 +1054,8 @@ err_bad:
  */
 u32 svc_max_payload(const struct svc_rqst *rqstp)
 {
-	int max = RPCSVC_MAXPAYLOAD_TCP;
+	int max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
 
-	if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM)
-		max = RPCSVC_MAXPAYLOAD_UDP;
 	if (rqstp->rq_server->sv_max_payload < max)
 		max = rqstp->rq_server->sv_max_payload;
 	return max;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 4755467..ca9b8d8 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -906,6 +906,7 @@ static struct svc_xprt_ops svc_udp_ops = {
 static struct svc_xprt_class svc_udp_class = {
 	.xcl_name = "udp",
 	.xcl_ops = &svc_udp_ops,
+	.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
 };
 
 static void
@@ -1359,6 +1360,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
 static struct svc_xprt_class svc_tcp_class = {
 	.xcl_name = "tcp",
 	.xcl_ops = &svc_tcp_ops,
+	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
 };
 
 void svc_init_xprt_sock(void)

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 05/38] svc: Move sk_sendto and sk_recvfrom to svc_xprt_class
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (3 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 04/38] svc: Add a max payload value to the transport Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 06/38] svc: Add transport specific xpo_release function Tom Tucker
                     ` (32 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


The sk_sendto and sk_recvfrom are function pointers that allow svc_sock
to be used for both UDP and TCP. Move these function pointers to the 
svc_xprt_ops structure.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    2 ++
 include/linux/sunrpc/svcsock.h  |    3 ---
 net/sunrpc/svcsock.c            |   12 ++++++------
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index b4ce054..81daa39 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -10,6 +10,8 @@
 #include <linux/sunrpc/svc.h>
 
 struct svc_xprt_ops {
+	int		(*xpo_recvfrom)(struct svc_rqst *);
+	int		(*xpo_sendto)(struct svc_rqst *);
 };
 
 struct svc_xprt_class {
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 1878cbe..08e78d0 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -45,9 +45,6 @@ struct svc_sock {
 						 * be revisted */
 	struct mutex		sk_mutex;	/* to serialize sending data */
 
-	int			(*sk_recvfrom)(struct svc_rqst *rqstp);
-	int			(*sk_sendto)(struct svc_rqst *rqstp);
-
 	/* We keep the old state_change and data_ready CB's here */
 	void			(*sk_ostate)(struct sock *);
 	void			(*sk_odata)(struct sock *, int bytes);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ca9b8d8..9c06b15 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -901,6 +901,8 @@ svc_udp_sendto(struct svc_rqst *rqstp)
 }
 
 static struct svc_xprt_ops svc_udp_ops = {
+	.xpo_recvfrom = svc_udp_recvfrom,
+	.xpo_sendto = svc_udp_sendto,
 };
 
 static struct svc_xprt_class svc_udp_class = {
@@ -918,8 +920,6 @@ svc_udp_init(struct svc_sock *svsk)
 	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt);
 	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
 	svsk->sk_sk->sk_write_space = svc_write_space;
-	svsk->sk_recvfrom = svc_udp_recvfrom;
-	svsk->sk_sendto = svc_udp_sendto;
 
 	/* initialise setting must have enough space to
 	 * receive and respond to one request.
@@ -1355,6 +1355,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 }
 
 static struct svc_xprt_ops svc_tcp_ops = {
+	.xpo_recvfrom = svc_tcp_recvfrom,
+	.xpo_sendto = svc_tcp_sendto,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
@@ -1382,8 +1384,6 @@ svc_tcp_init(struct svc_sock *svsk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt);
-	svsk->sk_recvfrom = svc_tcp_recvfrom;
-	svsk->sk_sendto = svc_tcp_sendto;
 
 	if (sk->sk_state == TCP_LISTEN) {
 		dprintk("setting up TCP socket for listening\n");
@@ -1531,7 +1531,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 
 	dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
 		 rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
-	len = svsk->sk_recvfrom(rqstp);
+	len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
 	dprintk("svc: got len=%d\n", len);
 
 	/* No data, incomplete (TCP) read, or accept() */
@@ -1591,7 +1591,7 @@ svc_send(struct svc_rqst *rqstp)
 	if (test_bit(SK_DEAD, &svsk->sk_flags))
 		len = -ENOTCONN;
 	else
-		len = svsk->sk_sendto(rqstp);
+		len = svsk->sk_xprt.xpt_ops->xpo_sendto(rqstp);
 	mutex_unlock(&svsk->sk_mutex);
 	svc_sock_release(rqstp);
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 06/38] svc: Add transport specific xpo_release function
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (4 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 05/38] svc: Move sk_sendto and sk_recvfrom to svc_xprt_class Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 07/38] svc: Add per-transport delete functions Tom Tucker
                     ` (31 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


The svc_sock_release function releases pages allocated to a thread. For
UDP, this also returns the receive skb to the stack. For RDMA it will 
post a receive WR and bump the client credit count. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc.h      |    2 +-
 include/linux/sunrpc/svc_xprt.h |    1 +
 net/sunrpc/svcsock.c            |   17 +++++++++--------
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 37f7448..cfb2652 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -217,7 +217,7 @@ struct svc_rqst {
 	struct auth_ops *	rq_authop;	/* authentication flavour */
 	u32			rq_flavor;	/* pseudoflavor */
 	struct svc_cred		rq_cred;	/* auth info */
-	struct sk_buff *	rq_skbuff;	/* fast recv inet buffer */
+	void *			rq_xprt_ctxt;	/* transport specific context ptr */
 	struct svc_deferred_req*rq_deferred;	/* deferred request we are replaying */
 
 	struct xdr_buf		rq_arg;
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 81daa39..e3bd7b1 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -12,6 +12,7 @@
 struct svc_xprt_ops {
 	int		(*xpo_recvfrom)(struct svc_rqst *);
 	int		(*xpo_sendto)(struct svc_rqst *);
+	void		(*xpo_release_rqst)(struct svc_rqst *);
 };
 
 struct svc_xprt_class {
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9c06b15..b24c084 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -185,14 +185,13 @@ svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
 /*
  * Release an skbuff after use
  */
-static inline void
-svc_release_skb(struct svc_rqst *rqstp)
+static void svc_release_skb(struct svc_rqst *rqstp)
 {
-	struct sk_buff *skb = rqstp->rq_skbuff;
+	struct sk_buff *skb = rqstp->rq_xprt_ctxt;
 	struct svc_deferred_req *dr = rqstp->rq_deferred;
 
 	if (skb) {
-		rqstp->rq_skbuff = NULL;
+		rqstp->rq_xprt_ctxt = NULL;
 
 		dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
 		skb_free_datagram(rqstp->rq_sock->sk_sk, skb);
@@ -395,7 +394,7 @@ svc_sock_release(struct svc_rqst *rqstp)
 {
 	struct svc_sock	*svsk = rqstp->rq_sock;
 
-	svc_release_skb(rqstp);
+	rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
 
 	svc_free_res_pages(rqstp);
 	rqstp->rq_res.page_len = 0;
@@ -867,7 +866,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 			skb_free_datagram(svsk->sk_sk, skb);
 			return 0;
 		}
-		rqstp->rq_skbuff = skb;
+		rqstp->rq_xprt_ctxt = skb;
 	}
 
 	rqstp->rq_arg.page_base = 0;
@@ -903,6 +902,7 @@ svc_udp_sendto(struct svc_rqst *rqstp)
 static struct svc_xprt_ops svc_udp_ops = {
 	.xpo_recvfrom = svc_udp_recvfrom,
 	.xpo_sendto = svc_udp_sendto,
+	.xpo_release_rqst = svc_release_skb,
 };
 
 static struct svc_xprt_class svc_udp_class = {
@@ -1291,7 +1291,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
 	}
 
-	rqstp->rq_skbuff      = NULL;
+	rqstp->rq_xprt_ctxt   = NULL;
 	rqstp->rq_prot	      = IPPROTO_TCP;
 
 	/* Reset TCP read info */
@@ -1357,6 +1357,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_recvfrom = svc_tcp_recvfrom,
 	.xpo_sendto = svc_tcp_sendto,
+	.xpo_release_rqst = svc_release_skb,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
@@ -1578,7 +1579,7 @@ svc_send(struct svc_rqst *rqstp)
 	}
 
 	/* release the receive skb before sending the reply */
-	svc_release_skb(rqstp);
+	rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
 
 	/* calculate over-all length */
 	xb = & rqstp->rq_res;

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 07/38] svc: Add per-transport delete functions
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (5 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 06/38] svc: Add transport specific xpo_release function Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 08/38] svc: Add xpo_prep_reply_hdr Tom Tucker
                     ` (30 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Add transport specific xpo_detach and xpo_free functions. The xpo_detach
function causes the transport to stop delivering data-ready events 
and enqueing the transport for I/O.

The xpo_free function frees all resources associated with the particular 
transport instance.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    2 +
 net/sunrpc/svcsock.c            |   56 ++++++++++++++++++++++++++++++---------
 2 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index e3bd7b1..5d7b2a6 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -13,6 +13,8 @@ struct svc_xprt_ops {
 	int		(*xpo_recvfrom)(struct svc_rqst *);
 	int		(*xpo_sendto)(struct svc_rqst *);
 	void		(*xpo_release_rqst)(struct svc_rqst *);
+	void		(*xpo_detach)(struct svc_xprt *);
+	void		(*xpo_free)(struct svc_xprt *);
 };
 
 struct svc_xprt_class {
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b24c084..478fa33 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -85,6 +85,8 @@ static void		svc_udp_data_ready(struct sock *, int);
 static int		svc_udp_recvfrom(struct svc_rqst *);
 static int		svc_udp_sendto(struct svc_rqst *);
 static void		svc_close_socket(struct svc_sock *svsk);
+static void		svc_sock_detach(struct svc_xprt *);
+static void		svc_sock_free(struct svc_xprt *);
 
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
@@ -376,16 +378,8 @@ static inline void
 svc_sock_put(struct svc_sock *svsk)
 {
 	if (atomic_dec_and_test(&svsk->sk_inuse)) {
-		BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags));
-
-		dprintk("svc: releasing dead socket\n");
-		if (svsk->sk_sock->file)
-			sockfd_put(svsk->sk_sock);
-		else
-			sock_release(svsk->sk_sock);
-		if (svsk->sk_info_authunix != NULL)
-			svcauth_unix_info_release(svsk->sk_info_authunix);
-		kfree(svsk);
+		BUG_ON(!test_bit(SK_DEAD, &svsk->sk_flags));
+		svsk->sk_xprt.xpt_ops->xpo_free(&svsk->sk_xprt);
 	}
 }
 
@@ -903,6 +897,8 @@ static struct svc_xprt_ops svc_udp_ops = {
 	.xpo_recvfrom = svc_udp_recvfrom,
 	.xpo_sendto = svc_udp_sendto,
 	.xpo_release_rqst = svc_release_skb,
+	.xpo_detach = svc_sock_detach,
+	.xpo_free = svc_sock_free,
 };
 
 static struct svc_xprt_class svc_udp_class = {
@@ -1358,6 +1354,8 @@ static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_recvfrom = svc_tcp_recvfrom,
 	.xpo_sendto = svc_tcp_sendto,
 	.xpo_release_rqst = svc_release_skb,
+	.xpo_detach = svc_sock_detach,
+	.xpo_free = svc_sock_free,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
@@ -1815,6 +1813,40 @@ bummer:
 }
 
 /*
+ * Detach the svc_sock from the socket so that no
+ * more callbacks occur.
+ */
+static void svc_sock_detach(struct svc_xprt *xprt)
+{
+	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+	struct sock *sk = svsk->sk_sk;
+
+	dprintk("svc: svc_sock_detach(%p)\n", svsk);
+
+	/* put back the old socket callbacks */
+	sk->sk_state_change = svsk->sk_ostate;
+	sk->sk_data_ready = svsk->sk_odata;
+	sk->sk_write_space = svsk->sk_owspace;
+}
+
+/*
+ * Free the svc_sock's socket resources and the svc_sock itself.
+ */
+static void svc_sock_free(struct svc_xprt *xprt)
+{
+	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+	dprintk("svc: svc_sock_free(%p)\n", svsk);
+
+	if (svsk->sk_info_authunix != NULL)
+		svcauth_unix_info_release(svsk->sk_info_authunix);
+	if (svsk->sk_sock->file)
+		sockfd_put(svsk->sk_sock);
+	else
+		sock_release(svsk->sk_sock);
+	kfree(svsk);
+}
+
+/*
  * Remove a dead socket
  */
 static void
@@ -1828,9 +1860,7 @@ svc_delete_socket(struct svc_sock *svsk)
 	serv = svsk->sk_server;
 	sk = svsk->sk_sk;
 
-	sk->sk_state_change = svsk->sk_ostate;
-	sk->sk_data_ready = svsk->sk_odata;
-	sk->sk_write_space = svsk->sk_owspace;
+	svsk->sk_xprt.xpt_ops->xpo_detach(&svsk->sk_xprt);
 
 	spin_lock_bh(&serv->sv_lock);
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 08/38] svc: Add xpo_prep_reply_hdr
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (6 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 07/38] svc: Add per-transport delete functions Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 09/38] svc: Add a transport function that checks for write space Tom Tucker
                     ` (29 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Some transports add fields to the RPC header for replies, e.g. the TCP
record length. This function is called when preparing the reply header
to allow each transport to add whatever fields it requires.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    1 +
 net/sunrpc/svc.c                |    6 +++---
 net/sunrpc/svcsock.c            |   17 +++++++++++++++++
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 5d7b2a6..8501115 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -11,6 +11,7 @@
 
 struct svc_xprt_ops {
 	int		(*xpo_recvfrom)(struct svc_rqst *);
+	void		(*xpo_prep_reply_hdr)(struct svc_rqst *);
 	int		(*xpo_sendto)(struct svc_rqst *);
 	void		(*xpo_release_rqst)(struct svc_rqst *);
 	void		(*xpo_detach)(struct svc_xprt *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index ce59044..4e084a2 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -839,9 +839,9 @@ svc_process(struct svc_rqst *rqstp)
 	rqstp->rq_res.tail[0].iov_len = 0;
 	/* Will be turned off only in gss privacy case: */
 	rqstp->rq_splice_ok = 1;
-	/* tcp needs a space for the record length... */
-	if (rqstp->rq_prot == IPPROTO_TCP)
-		svc_putnl(resv, 0);
+
+	/* Setup reply header */
+	rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
 
 	rqstp->rq_xid = svc_getu32(argv);
 	svc_putu32(resv, rqstp->rq_xid);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 478fa33..510ad45 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -893,12 +893,17 @@ svc_udp_sendto(struct svc_rqst *rqstp)
 	return error;
 }
 
+static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
+{
+}
+
 static struct svc_xprt_ops svc_udp_ops = {
 	.xpo_recvfrom = svc_udp_recvfrom,
 	.xpo_sendto = svc_udp_sendto,
 	.xpo_release_rqst = svc_release_skb,
 	.xpo_detach = svc_sock_detach,
 	.xpo_free = svc_sock_free,
+	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
 };
 
 static struct svc_xprt_class svc_udp_class = {
@@ -1350,12 +1355,24 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 	return sent;
 }
 
+/*
+ * Setup response header. TCP has a 4B record length field.
+ */
+static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
+{
+	struct kvec *resv = &rqstp->rq_res.head[0];
+
+	/* tcp needs a space for the record length... */
+	svc_putnl(resv, 0);
+}
+
 static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_recvfrom = svc_tcp_recvfrom,
 	.xpo_sendto = svc_tcp_sendto,
 	.xpo_release_rqst = svc_release_skb,
 	.xpo_detach = svc_sock_detach,
 	.xpo_free = svc_sock_free,
+	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
 };
 
 static struct svc_xprt_class svc_tcp_class = {

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 09/38] svc: Add a transport function that checks for write space
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (7 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 08/38] svc: Add xpo_prep_reply_hdr Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
       [not found]     ` <20071129224012.14563.23130.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  2007-11-29 22:40   ` [RFC,PATCH 10/38] svc: Move close processing to a single place Tom Tucker
                     ` (28 subsequent siblings)
  37 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


In order to avoid blocking a service thread, the receive side checks
to see if there is sufficient write space to reply to the request.
Each transport has a different mechanism for determining if there is
enough write space to reply.

The code that checked for white space was coupled with code that 
checked for CLOSE and CONN. These checks have been broken out into 
separate statements to make the code easier to read.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    1 +
 net/sunrpc/svcsock.c            |   60 +++++++++++++++++++++++++++++++++------
 2 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 8501115..3adc8f3 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -10,6 +10,7 @@
 #include <linux/sunrpc/svc.h>
 
 struct svc_xprt_ops {
+	int		(*xpo_has_wspace)(struct svc_xprt *);
 	int		(*xpo_recvfrom)(struct svc_rqst *);
 	void		(*xpo_prep_reply_hdr)(struct svc_rqst *);
 	int		(*xpo_sendto)(struct svc_rqst *);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 510ad45..b796244 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -269,22 +269,24 @@ svc_sock_enqueue(struct svc_sock *svsk)
 	BUG_ON(svsk->sk_pool != NULL);
 	svsk->sk_pool = pool;
 
-	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-	if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
-	     > svc_sock_wspace(svsk))
-	    && !test_bit(SK_CLOSE, &svsk->sk_flags)
-	    && !test_bit(SK_CONN, &svsk->sk_flags)) {
+	/* Handle pending connection */
+	if (test_bit(SK_CONN, &svsk->sk_flags))
+		goto process;
+
+	/* Handle close in-progress */
+	if (test_bit(SK_CLOSE, &svsk->sk_flags))
+		goto process;
+
+	/* Check if we have space to reply to a request */
+	if (!svsk->sk_xprt.xpt_ops->xpo_has_wspace(&svsk->sk_xprt)) {
 		/* Don't enqueue while not enough space for reply */
-		dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
-			svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
-			svc_sock_wspace(svsk));
+		dprintk("svc: no write space, socket %p  not enqueued\n", svsk);
 		svsk->sk_pool = NULL;
 		clear_bit(SK_BUSY, &svsk->sk_flags);
 		goto out_unlock;
 	}
-	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-
 
+ process:
 	if (!list_empty(&pool->sp_threads)) {
 		rqstp = list_entry(pool->sp_threads.next,
 				   struct svc_rqst,
@@ -897,6 +899,24 @@ static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
 {
 }
 
+static int svc_udp_has_wspace(struct svc_xprt *xprt)
+{
+	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+	struct svc_serv	*serv = svsk->sk_server;
+	int required;
+
+	/*
+	 * Set the SOCK_NOSPACE flag before checking the available
+	 * sock space.
+	 */
+	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
+	if (required*2 > sock_wspace(svsk->sk_sk))
+		return 0;
+	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+	return 1;
+}
+
 static struct svc_xprt_ops svc_udp_ops = {
 	.xpo_recvfrom = svc_udp_recvfrom,
 	.xpo_sendto = svc_udp_sendto,
@@ -904,6 +924,7 @@ static struct svc_xprt_ops svc_udp_ops = {
 	.xpo_detach = svc_sock_detach,
 	.xpo_free = svc_sock_free,
 	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
+	.xpo_has_wspace = svc_udp_has_wspace,
 };
 
 static struct svc_xprt_class svc_udp_class = {
@@ -1366,6 +1387,24 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
 	svc_putnl(resv, 0);
 }
 
+static int svc_tcp_has_wspace(struct svc_xprt *xprt)
+{
+	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+	struct svc_serv	*serv = svsk->sk_server;
+	int required;
+
+	/*
+	 * Set the SOCK_NOSPACE flag before checking the available
+	 * sock space.
+	 */
+	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
+	if (required*2 > sk_stream_wspace(svsk->sk_sk))
+		return 0;
+	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+	return 1;
+}
+
 static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_recvfrom = svc_tcp_recvfrom,
 	.xpo_sendto = svc_tcp_sendto,
@@ -1373,6 +1412,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_detach = svc_sock_detach,
 	.xpo_free = svc_sock_free,
 	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+	.xpo_has_wspace = svc_tcp_has_wspace,
 };
 
 static struct svc_xprt_class svc_tcp_class = {

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 10/38] svc: Move close processing to a single place
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (8 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 09/38] svc: Add a transport function that checks for write space Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 11/38] svc: Add xpo_accept transport function Tom Tucker
                     ` (27 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Close handling was duplicated in the UDP and TCP recvfrom 
methods. This code has been moved to the transport independent
svc_recv function.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 net/sunrpc/svcsock.c |   24 ++++++++++--------------
 1 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b796244..38ecdd1 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -792,11 +792,6 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 		return svc_deferred_recv(rqstp);
 	}
 
-	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
-		svc_delete_socket(svsk);
-		return 0;
-	}
-
 	clear_bit(SK_DATA, &svsk->sk_flags);
 	skb = NULL;
 	err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
@@ -1197,11 +1192,6 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		return svc_deferred_recv(rqstp);
 	}
 
-	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
-		svc_delete_socket(svsk);
-		return 0;
-	}
-
 	if (svsk->sk_sk->sk_state == TCP_LISTEN) {
 		svc_tcp_accept(svsk);
 		svc_sock_received(svsk);
@@ -1585,10 +1575,16 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 	}
 	spin_unlock_bh(&pool->sp_lock);
 
-	dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
-		 rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
-	len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
-	dprintk("svc: got len=%d\n", len);
+	len = 0;
+	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
+		dprintk("svc_recv: found SK_CLOSE\n");
+		svc_delete_socket(svsk);
+	} else {
+		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
+			rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
+		len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
+		dprintk("svc: got len=%d\n", len);
+	}
 
 	/* No data, incomplete (TCP) read, or accept() */
 	if (len == 0 || len == -EAGAIN) {

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 11/38] svc: Add xpo_accept transport function
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (9 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 10/38] svc: Move close processing to a single place Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
       [not found]     ` <20071129224016.14563.67547.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  2007-11-29 22:40   ` [RFC,PATCH 12/38] svc: Add a generic transport svc_create_xprt function Tom Tucker
                     ` (26 subsequent siblings)
  37 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Previously, the accept logic looked into the socket state to determine
whether to call accept or recv when data-ready was indicated on an endpoint. 
Since some transports don't use sockets, this logic was changed to use a flag 
bit (SK_LISTENER) to identify listening endpoints. A transport function 
(xpo_accept) was added to allow each transport to define its own accept 
processing. A transport's initialization logic is reponsible for setting the 
SK_LISTENER bit. I didn't see any way to do this in transport independent 
logic since the passive side of a UDP connection doesn't listen and
always recv's.

In the svc_recv function, if the SK_LISTENER bit is set, the transport
xpo_accept function is called to handle accept processing.

Note that all functions are defined even if they don't make sense
for a given transport. For example, accept doesn't mean anything for
UDP. The fuction is defined anyway and bug checks if called. The
UDP transport should never set the SK_LISTENER bit.

The code that poaches connections when the connection
limit is hit was moved to a subroutine to make the accept logic path
easier to follow. Since this is in the new connection path, it should 
not be a performance issue.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    1 
 include/linux/sunrpc/svcsock.h  |    1 
 net/sunrpc/svcsock.c            |  127 +++++++++++++++++++++------------------
 3 files changed, 72 insertions(+), 57 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 3adc8f3..1527ff1 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -10,6 +10,7 @@
 #include <linux/sunrpc/svc.h>
 
 struct svc_xprt_ops {
+	struct svc_xprt	*(*xpo_accept)(struct svc_xprt *);
 	int		(*xpo_has_wspace)(struct svc_xprt *);
 	int		(*xpo_recvfrom)(struct svc_rqst *);
 	void		(*xpo_prep_reply_hdr)(struct svc_rqst *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 08e78d0..9882ce0 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -36,6 +36,7 @@ struct svc_sock {
 #define	SK_DEFERRED	8			/* request on sk_deferred */
 #define	SK_OLD		9			/* used for temp socket aging mark+sweep */
 #define	SK_DETACHED	10			/* detached from tempsocks list */
+#define SK_LISTENER	11			/* listening endpoint */
 
 	atomic_t    	    	sk_reserved;	/* space on outq that is reserved */
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 38ecdd1..661162b 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -912,6 +912,12 @@ static int svc_udp_has_wspace(struct svc_xprt *xprt)
 	return 1;
 }
 
+static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
+{
+	BUG();
+	return NULL;
+}
+
 static struct svc_xprt_ops svc_udp_ops = {
 	.xpo_recvfrom = svc_udp_recvfrom,
 	.xpo_sendto = svc_udp_sendto,
@@ -920,6 +926,7 @@ static struct svc_xprt_ops svc_udp_ops = {
 	.xpo_free = svc_sock_free,
 	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
 	.xpo_has_wspace = svc_udp_has_wspace,
+	.xpo_accept = svc_udp_accept,
 };
 
 static struct svc_xprt_class svc_udp_class = {
@@ -1044,9 +1051,9 @@ static inline int svc_port_is_privileged(struct sockaddr *sin)
 /*
  * Accept a TCP connection
  */
-static void
-svc_tcp_accept(struct svc_sock *svsk)
+static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 {
+	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 	struct sockaddr_storage addr;
 	struct sockaddr	*sin = (struct sockaddr *) &addr;
 	struct svc_serv	*serv = svsk->sk_server;
@@ -1058,7 +1065,7 @@ svc_tcp_accept(struct svc_sock *svsk)
 
 	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
 	if (!sock)
-		return;
+		return NULL;
 
 	clear_bit(SK_CONN, &svsk->sk_flags);
 	err = kernel_accept(sock, &newsock, O_NONBLOCK);
@@ -1069,7 +1076,7 @@ svc_tcp_accept(struct svc_sock *svsk)
 		else if (err != -EAGAIN && net_ratelimit())
 			printk(KERN_WARNING "%s: accept failed (err %d)!\n",
 				   serv->sv_name, -err);
-		return;
+		return NULL;
 	}
 
 	set_bit(SK_CONN, &svsk->sk_flags);
@@ -1115,59 +1122,14 @@ svc_tcp_accept(struct svc_sock *svsk)
 
 	svc_sock_received(newsvsk);
 
-	/* make sure that we don't have too many active connections.
-	 * If we have, something must be dropped.
-	 *
-	 * There's no point in trying to do random drop here for
-	 * DoS prevention. The NFS clients does 1 reconnect in 15
-	 * seconds. An attacker can easily beat that.
-	 *
-	 * The only somewhat efficient mechanism would be if drop
-	 * old connections from the same IP first. But right now
-	 * we don't even record the client IP in svc_sock.
-	 */
-	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
-		struct svc_sock *svsk = NULL;
-		spin_lock_bh(&serv->sv_lock);
-		if (!list_empty(&serv->sv_tempsocks)) {
-			if (net_ratelimit()) {
-				/* Try to help the admin */
-				printk(KERN_NOTICE "%s: too many open TCP "
-					"sockets, consider increasing the "
-					"number of nfsd threads\n",
-						   serv->sv_name);
-				printk(KERN_NOTICE
-				       "%s: last TCP connect from %s\n",
-				       serv->sv_name, __svc_print_addr(sin,
-							buf, sizeof(buf)));
-			}
-			/*
-			 * Always select the oldest socket. It's not fair,
-			 * but so is life
-			 */
-			svsk = list_entry(serv->sv_tempsocks.prev,
-					  struct svc_sock,
-					  sk_list);
-			set_bit(SK_CLOSE, &svsk->sk_flags);
-			atomic_inc(&svsk->sk_inuse);
-		}
-		spin_unlock_bh(&serv->sv_lock);
-
-		if (svsk) {
-			svc_sock_enqueue(svsk);
-			svc_sock_put(svsk);
-		}
-
-	}
-
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpconn++;
 
-	return;
+	return &newsvsk->sk_xprt;
 
 failed:
 	sock_release(newsock);
-	return;
+	return NULL;
 }
 
 /*
@@ -1192,12 +1154,6 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		return svc_deferred_recv(rqstp);
 	}
 
-	if (svsk->sk_sk->sk_state == TCP_LISTEN) {
-		svc_tcp_accept(svsk);
-		svc_sock_received(svsk);
-		return 0;
-	}
-
 	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
 		/* sndbuf needs to have room for one request
 		 * per thread, otherwise we can stall even when the
@@ -1403,6 +1359,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_free = svc_sock_free,
 	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
 	.xpo_has_wspace = svc_tcp_has_wspace,
+	.xpo_accept = svc_tcp_accept,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
@@ -1433,6 +1390,7 @@ svc_tcp_init(struct svc_sock *svsk)
 
 	if (sk->sk_state == TCP_LISTEN) {
 		dprintk("setting up TCP socket for listening\n");
+		set_bit(SK_LISTENER, &svsk->sk_flags);
 		sk->sk_data_ready = svc_tcp_listen_data_ready;
 		set_bit(SK_CONN, &svsk->sk_flags);
 	} else {
@@ -1484,6 +1442,55 @@ svc_sock_update_bufs(struct svc_serv *serv)
 	spin_unlock_bh(&serv->sv_lock);
 }
 
+static void
+svc_check_conn_limits(struct svc_serv *serv)
+{
+	char	buf[RPC_MAX_ADDRBUFLEN];
+
+	/* make sure that we don't have too many active connections.
+	 * If we have, something must be dropped.
+	 *
+	 * There's no point in trying to do random drop here for
+	 * DoS prevention. The NFS clients does 1 reconnect in 15
+	 * seconds. An attacker can easily beat that.
+	 *
+	 * The only somewhat efficient mechanism would be if drop
+	 * old connections from the same IP first. But right now
+	 * we don't even record the client IP in svc_sock.
+	 */
+	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
+		struct svc_sock *svsk = NULL;
+		spin_lock_bh(&serv->sv_lock);
+		if (!list_empty(&serv->sv_tempsocks)) {
+			if (net_ratelimit()) {
+				/* Try to help the admin */
+				printk(KERN_NOTICE "%s: too many open TCP "
+					"sockets, consider increasing the "
+					"number of nfsd threads\n",
+						   serv->sv_name);
+				printk(KERN_NOTICE
+				       "%s: last TCP connect from %s\n",
+				       serv->sv_name, buf);
+			}
+			/*
+			 * Always select the oldest socket. It's not fair,
+			 * but so is life
+			 */
+			svsk = list_entry(serv->sv_tempsocks.prev,
+					  struct svc_sock,
+					  sk_list);
+			set_bit(SK_CLOSE, &svsk->sk_flags);
+			atomic_inc(&svsk->sk_inuse);
+		}
+		spin_unlock_bh(&serv->sv_lock);
+
+		if (svsk) {
+			svc_sock_enqueue(svsk);
+			svc_sock_put(svsk);
+		}
+	}
+}
+
 /*
  * Receive the next request on any socket.  This code is carefully
  * organised not to touch any cachelines in the shared svc_serv
@@ -1579,6 +1586,12 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
 		dprintk("svc_recv: found SK_CLOSE\n");
 		svc_delete_socket(svsk);
+	} else if (test_bit(SK_LISTENER, &svsk->sk_flags)) {
+		struct svc_xprt *newxpt;
+		newxpt = svsk->sk_xprt.xpt_ops->xpo_accept(&svsk->sk_xprt);
+		if (newxpt)
+			svc_check_conn_limits(svsk->sk_server);
+		svc_sock_received(svsk);
 	} else {
 		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
 			rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 12/38] svc: Add a generic transport svc_create_xprt function
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (10 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 11/38] svc: Add xpo_accept transport function Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 13/38] svc: Change services to use new svc_create_xprt service Tom Tucker
                     ` (25 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


The svc_create_xprt function is a transport independent version
of the svc_makesock function. 

Since transport instance creation contains transport dependent and 
independent components, add an xpo_create transport function. The 
transport implementation of this function allocates the memory for the 
endpoint, implements the transport dependent initialization logic, and
calls svc_xprt_init to initialize the transport independent field (svc_xprt) 
in it's data structure.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    4 +++
 net/sunrpc/svc_xprt.c           |   37 ++++++++++++++++++++++++++
 net/sunrpc/svcsock.c            |   56 +++++++++++++++++++++++++++++----------
 3 files changed, 82 insertions(+), 15 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 1527ff1..3f4a1df 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -10,6 +10,9 @@
 #include <linux/sunrpc/svc.h>
 
 struct svc_xprt_ops {
+	struct svc_xprt	*(*xpo_create)(struct svc_serv *,
+				       struct sockaddr *, int,
+				       int);
 	struct svc_xprt	*(*xpo_accept)(struct svc_xprt *);
 	int		(*xpo_has_wspace)(struct svc_xprt *);
 	int		(*xpo_recvfrom)(struct svc_rqst *);
@@ -36,5 +39,6 @@ struct svc_xprt {
 int	svc_reg_xprt_class(struct svc_xprt_class *);
 int	svc_unreg_xprt_class(struct svc_xprt_class *);
 void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *);
+int	svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
 
 #endif /* SUNRPC_SVC_XPRT_H */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 92ea85b..9136da4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -93,3 +93,40 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt)
 	xprt->xpt_ops = xcl->xcl_ops;
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
+
+int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
+		    int flags)
+{
+	struct svc_xprt_class *xcl;
+	int ret = -ENOENT;
+	struct sockaddr_in sin = {
+		.sin_family		= AF_INET,
+		.sin_addr.s_addr	= INADDR_ANY,
+		.sin_port		= htons(port),
+	};
+	dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
+	spin_lock(&svc_xprt_class_lock);
+	list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
+		if (strcmp(xprt_name, xcl->xcl_name) == 0) {
+			spin_unlock(&svc_xprt_class_lock);
+			if (try_module_get(xcl->xcl_owner)) {
+				struct svc_xprt *newxprt;
+				ret = 0;
+				newxprt = xcl->xcl_ops->xpo_create
+					(serv,
+					 (struct sockaddr *)&sin, sizeof(sin),
+					 flags);
+				if (IS_ERR(newxprt)) {
+					module_put(xcl->xcl_owner);
+					ret = PTR_ERR(newxprt);
+				}
+			}
+			goto out;
+		}
+	}
+	spin_unlock(&svc_xprt_class_lock);
+	dprintk("svc: transport %s not found\n", xprt_name);
+ out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(svc_create_xprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 661162b..0bfffbc 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -91,6 +91,8 @@ static void		svc_sock_free(struct svc_xprt *);
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
+static struct svc_xprt *
+svc_create_socket(struct svc_serv *, int, struct sockaddr *, int, int);
 
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
@@ -381,6 +383,7 @@ svc_sock_put(struct svc_sock *svsk)
 {
 	if (atomic_dec_and_test(&svsk->sk_inuse)) {
 		BUG_ON(!test_bit(SK_DEAD, &svsk->sk_flags));
+		module_put(svsk->sk_xprt.xpt_class->xcl_owner);
 		svsk->sk_xprt.xpt_ops->xpo_free(&svsk->sk_xprt);
 	}
 }
@@ -918,7 +921,14 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
 	return NULL;
 }
 
+static struct svc_xprt *
+svc_udp_create(struct svc_serv *serv, struct sockaddr *sa, int salen, int flags)
+{
+	return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags);
+}
+
 static struct svc_xprt_ops svc_udp_ops = {
+	.xpo_create = svc_udp_create,
 	.xpo_recvfrom = svc_udp_recvfrom,
 	.xpo_sendto = svc_udp_sendto,
 	.xpo_release_rqst = svc_release_skb,
@@ -931,6 +941,7 @@ static struct svc_xprt_ops svc_udp_ops = {
 
 static struct svc_xprt_class svc_udp_class = {
 	.xcl_name = "udp",
+	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_udp_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
 };
@@ -1351,7 +1362,14 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
 	return 1;
 }
 
+static struct svc_xprt *
+svc_tcp_create(struct svc_serv *serv, struct sockaddr *sa, int salen, int flags)
+{
+	return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags);
+}
+
 static struct svc_xprt_ops svc_tcp_ops = {
+	.xpo_create = svc_tcp_create,
 	.xpo_recvfrom = svc_tcp_recvfrom,
 	.xpo_sendto = svc_tcp_sendto,
 	.xpo_release_rqst = svc_release_skb,
@@ -1364,6 +1382,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
 
 static struct svc_xprt_class svc_tcp_class = {
 	.xcl_name = "tcp",
+	.xcl_owner = THIS_MODULE,
 	.xcl_ops = &svc_tcp_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
 };
@@ -1589,8 +1608,14 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 	} else if (test_bit(SK_LISTENER, &svsk->sk_flags)) {
 		struct svc_xprt *newxpt;
 		newxpt = svsk->sk_xprt.xpt_ops->xpo_accept(&svsk->sk_xprt);
-		if (newxpt)
+		if (newxpt) {
+			/*
+			 * We know this module_get will succeed because the
+			 * listener holds a reference too
+			 */
+			__module_get(newxpt->xpt_class->xcl_owner);
 			svc_check_conn_limits(svsk->sk_server);
+		}
 		svc_sock_received(svsk);
 	} else {
 		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
@@ -1830,8 +1855,9 @@ EXPORT_SYMBOL_GPL(svc_addsock);
 /*
  * Create socket for RPC service.
  */
-static int svc_create_socket(struct svc_serv *serv, int protocol,
-				struct sockaddr *sin, int len, int flags)
+static struct svc_xprt *
+svc_create_socket(struct svc_serv *serv, int protocol,
+		  struct sockaddr *sin, int len, int flags)
 {
 	struct svc_sock	*svsk;
 	struct socket	*sock;
@@ -1846,13 +1872,13 @@ static int svc_create_socket(struct svc_serv *serv, int protocol,
 	if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
 		printk(KERN_WARNING "svc: only UDP and TCP "
 				"sockets supported\n");
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 	}
 	type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
 
 	error = sock_create_kern(sin->sa_family, type, protocol, &sock);
 	if (error < 0)
-		return error;
+		return ERR_PTR(error);
 
 	svc_reclassify_socket(sock);
 
@@ -1869,13 +1895,13 @@ static int svc_create_socket(struct svc_serv *serv, int protocol,
 
 	if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
 		svc_sock_received(svsk);
-		return ntohs(inet_sk(svsk->sk_sk)->sport);
+		return (struct svc_xprt *)svsk;
 	}
 
 bummer:
 	dprintk("svc: svc_create_socket error = %d\n", -error);
 	sock_release(sock);
-	return error;
+	return ERR_PTR(error);
 }
 
 /*
@@ -1986,15 +2012,15 @@ void svc_force_close_socket(struct svc_sock *svsk)
 int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port,
 			int flags)
 {
-	struct sockaddr_in sin = {
-		.sin_family		= AF_INET,
-		.sin_addr.s_addr	= INADDR_ANY,
-		.sin_port		= htons(port),
-	};
-
 	dprintk("svc: creating socket proto = %d\n", protocol);
-	return svc_create_socket(serv, protocol, (struct sockaddr *) &sin,
-							sizeof(sin), flags);
+	switch (protocol) {
+	case IPPROTO_TCP:
+		return svc_create_xprt(serv, "tcp", port, flags);
+	case IPPROTO_UDP:
+		return svc_create_xprt(serv, "udp", port, flags);
+	default:
+		return -EINVAL;
+	}
 }
 
 /*

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 13/38] svc: Change services to use new svc_create_xprt service
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (11 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 12/38] svc: Add a generic transport svc_create_xprt function Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 14/38] svc: Change sk_inuse to a kref Tom Tucker
                     ` (24 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Modify the various kernel RPC svcs to use the svc_create_xprt service.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 fs/lockd/svc.c                 |   17 ++++++++---------
 fs/nfs/callback.c              |    4 ++--
 fs/nfsd/nfssvc.c               |    4 ++--
 include/linux/sunrpc/svcsock.h |    1 -
 net/sunrpc/sunrpc_syms.c       |    1 -
 net/sunrpc/svcsock.c           |   22 ----------------------
 6 files changed, 12 insertions(+), 37 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 82e2192..8686915 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -219,13 +219,12 @@ lockd(struct svc_rqst *rqstp)
 	module_put_and_exit(0);
 }
 
-
-static int find_socket(struct svc_serv *serv, int proto)
+static int find_xprt(struct svc_serv *serv, char *proto)
 {
 	struct svc_sock *svsk;
 	int found = 0;
 	list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
-		if (svsk->sk_sk->sk_protocol == proto) {
+		if (strcmp(svsk->sk_xprt.xpt_class->xcl_name, proto) == 0) {
 			found = 1;
 			break;
 		}
@@ -243,13 +242,13 @@ static int make_socks(struct svc_serv *serv, int proto)
 	int err = 0;
 
 	if (proto == IPPROTO_UDP || nlm_udpport)
-		if (!find_socket(serv, IPPROTO_UDP))
-			err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport,
-						SVC_SOCK_DEFAULTS);
+		if (!find_xprt(serv, "udp"))
+			err = svc_create_xprt(serv, "udp", nlm_udpport,
+					      SVC_SOCK_DEFAULTS);
 	if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport))
-		if (!find_socket(serv, IPPROTO_TCP))
-			err = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport,
-						SVC_SOCK_DEFAULTS);
+		if (!find_xprt(serv, "tcp"))
+			err = svc_create_xprt(serv, "tcp", nlm_tcpport,
+					      SVC_SOCK_DEFAULTS);
 
 	if (err >= 0) {
 		warned = 0;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a796be5..e27ca14 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -123,8 +123,8 @@ int nfs_callback_up(void)
 	if (!serv)
 		goto out_err;
 
-	ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport,
-							SVC_SOCK_ANONYMOUS);
+	ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport,
+			      SVC_SOCK_ANONYMOUS);
 	if (ret <= 0)
 		goto out_destroy;
 	nfs_callback_tcpport = ret;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 1190aea..a828b0b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -236,7 +236,7 @@ static int nfsd_init_socks(int port)
 
 	error = lockd_up(IPPROTO_UDP);
 	if (error >= 0) {
-		error = svc_makesock(nfsd_serv, IPPROTO_UDP, port,
+		error = svc_create_xprt(nfsd_serv, "udp", port,
 					SVC_SOCK_DEFAULTS);
 		if (error < 0)
 			lockd_down();
@@ -247,7 +247,7 @@ static int nfsd_init_socks(int port)
 #ifdef CONFIG_NFSD_TCP
 	error = lockd_up(IPPROTO_TCP);
 	if (error >= 0) {
-		error = svc_makesock(nfsd_serv, IPPROTO_TCP, port,
+		error = svc_create_xprt(nfsd_serv, "tcp", port,
 					SVC_SOCK_DEFAULTS);
 		if (error < 0)
 			lockd_down();
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 9882ce0..3181d9d 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -67,7 +67,6 @@ struct svc_sock {
 /*
  * Function prototypes.
  */
-int		svc_makesock(struct svc_serv *, int, unsigned short, int flags);
 void		svc_force_close_socket(struct svc_sock *);
 int		svc_recv(struct svc_rqst *, long);
 int		svc_send(struct svc_rqst *);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 79ea05f..52eea54 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -72,7 +72,6 @@ EXPORT_SYMBOL(svc_drop);
 EXPORT_SYMBOL(svc_process);
 EXPORT_SYMBOL(svc_recv);
 EXPORT_SYMBOL(svc_wake_up);
-EXPORT_SYMBOL(svc_makesock);
 EXPORT_SYMBOL(svc_reserve);
 EXPORT_SYMBOL(svc_auth_register);
 EXPORT_SYMBOL(auth_domain_lookup);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0bfffbc..574cdbe 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -2001,28 +2001,6 @@ void svc_force_close_socket(struct svc_sock *svsk)
 	svc_close_socket(svsk);
 }
 
-/**
- * svc_makesock - Make a socket for nfsd and lockd
- * @serv: RPC server structure
- * @protocol: transport protocol to use
- * @port: port to use
- * @flags: requested socket characteristics
- *
- */
-int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port,
-			int flags)
-{
-	dprintk("svc: creating socket proto = %d\n", protocol);
-	switch (protocol) {
-	case IPPROTO_TCP:
-		return svc_create_xprt(serv, "tcp", port, flags);
-	case IPPROTO_UDP:
-		return svc_create_xprt(serv, "udp", port, flags);
-	default:
-		return -EINVAL;
-	}
-}
-
 /*
  * Handle defer and revisit of requests
  */

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 14/38] svc: Change sk_inuse to a kref
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (12 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 13/38] svc: Change services to use new svc_create_xprt service Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 15/38] svc: Move sk_flags to the svc_xprt structure Tom Tucker
                     ` (23 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Change the atomic_t reference count to a kref and move it to the 
transport indepenent svc_xprt structure. Change the reference count
wrapper names to be generic.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    8 ++++++
 include/linux/sunrpc/svcsock.h  |    1 -
 net/sunrpc/svc_xprt.c           |   17 ++++++++++++
 net/sunrpc/svcsock.c            |   54 +++++++++++++++------------------------
 4 files changed, 46 insertions(+), 34 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 3f4a1df..eb801ad 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -8,6 +8,7 @@
 #define SUNRPC_SVC_XPRT_H
 
 #include <linux/sunrpc/svc.h>
+#include <linux/module.h>
 
 struct svc_xprt_ops {
 	struct svc_xprt	*(*xpo_create)(struct svc_serv *,
@@ -34,11 +35,18 @@ struct svc_xprt_class {
 struct svc_xprt {
 	struct svc_xprt_class	*xpt_class;
 	struct svc_xprt_ops	*xpt_ops;
+	struct kref		xpt_ref;
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
 int	svc_unreg_xprt_class(struct svc_xprt_class *);
 void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *);
 int	svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
+void	svc_xprt_put(struct svc_xprt *xprt);
+
+static inline void svc_xprt_get(struct svc_xprt *xprt)
+{
+	kref_get(&xprt->xpt_ref);
+}
 
 #endif /* SUNRPC_SVC_XPRT_H */
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 3181d9d..ba07d50 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -24,7 +24,6 @@ struct svc_sock {
 
 	struct svc_pool *	sk_pool;	/* current pool iff queued */
 	struct svc_serv *	sk_server;	/* service for this socket */
-	atomic_t		sk_inuse;	/* use count */
 	unsigned long		sk_flags;
 #define	SK_BUSY		0			/* enqueued/receiving */
 #define	SK_CONN		1			/* conn pending */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 9136da4..43418cf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -82,6 +82,22 @@ int svc_unreg_xprt_class(struct svc_xprt_class *xcl)
 }
 EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
 
+static void svc_xprt_free(struct kref *kref)
+{
+	struct svc_xprt *xprt =
+		container_of(kref, struct svc_xprt, xpt_ref);
+	struct module *owner = xprt->xpt_class->xcl_owner;
+	BUG_ON(atomic_read(&kref->refcount));
+	xprt->xpt_ops->xpo_free(xprt);
+	module_put(owner);
+}
+
+void svc_xprt_put(struct svc_xprt *xprt)
+{
+	kref_put(&xprt->xpt_ref, svc_xprt_free);
+}
+EXPORT_SYMBOL_GPL(svc_xprt_put);
+
 /*
  * Called by transport drivers to initialize the transport independent
  * portion of the transport instance.
@@ -91,6 +107,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt)
 	memset(xprt, 0, sizeof(*xprt));
 	xprt->xpt_class = xcl;
 	xprt->xpt_ops = xcl->xcl_ops;
+	kref_init(&xprt->xpt_ref);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 574cdbe..6dfceff 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -66,8 +66,8 @@
  *		after a clear, the socket must be read/accepted
  *		 if this succeeds, it must be set again.
  *	SK_CLOSE can set at any time. It is never cleared.
- *      sk_inuse contains a bias of '1' until SK_DEAD is set.
- *             so when sk_inuse hits zero, we know the socket is dead
+ *      xpt_ref contains a bias of '1' until SK_DEAD is set.
+ *             so when xprt_ref hits zero, we know the transport is dead
  *             and no-one is using it.
  *      SK_DEAD can only be set while SK_BUSY is held which ensures
  *             no other thread will be using the socket or will try to
@@ -301,7 +301,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
 				"svc_sock_enqueue: server %p, rq_sock=%p!\n",
 				rqstp, rqstp->rq_sock);
 		rqstp->rq_sock = svsk;
-		atomic_inc(&svsk->sk_inuse);
+		svc_xprt_get(&svsk->sk_xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
 		atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
 		BUG_ON(svsk->sk_pool != pool);
@@ -332,7 +332,7 @@ svc_sock_dequeue(struct svc_pool *pool)
 	list_del_init(&svsk->sk_ready);
 
 	dprintk("svc: socket %p dequeued, inuse=%d\n",
-		svsk->sk_sk, atomic_read(&svsk->sk_inuse));
+		svsk->sk_sk, atomic_read(&svsk->sk_xprt.xpt_ref.refcount));
 
 	return svsk;
 }
@@ -375,19 +375,6 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 	}
 }
 
-/*
- * Release a socket after use.
- */
-static inline void
-svc_sock_put(struct svc_sock *svsk)
-{
-	if (atomic_dec_and_test(&svsk->sk_inuse)) {
-		BUG_ON(!test_bit(SK_DEAD, &svsk->sk_flags));
-		module_put(svsk->sk_xprt.xpt_class->xcl_owner);
-		svsk->sk_xprt.xpt_ops->xpo_free(&svsk->sk_xprt);
-	}
-}
-
 static void
 svc_sock_release(struct svc_rqst *rqstp)
 {
@@ -414,7 +401,7 @@ svc_sock_release(struct svc_rqst *rqstp)
 	svc_reserve(rqstp, 0);
 	rqstp->rq_sock = NULL;
 
-	svc_sock_put(svsk);
+	svc_xprt_put(&svsk->sk_xprt);
 }
 
 /*
@@ -1499,13 +1486,13 @@ svc_check_conn_limits(struct svc_serv *serv)
 					  struct svc_sock,
 					  sk_list);
 			set_bit(SK_CLOSE, &svsk->sk_flags);
-			atomic_inc(&svsk->sk_inuse);
+			svc_xprt_get(&svsk->sk_xprt);
 		}
 		spin_unlock_bh(&serv->sv_lock);
 
 		if (svsk) {
 			svc_sock_enqueue(svsk);
-			svc_sock_put(svsk);
+			svc_xprt_put(&svsk->sk_xprt);
 		}
 	}
 }
@@ -1570,7 +1557,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 	spin_lock_bh(&pool->sp_lock);
 	if ((svsk = svc_sock_dequeue(pool)) != NULL) {
 		rqstp->rq_sock = svsk;
-		atomic_inc(&svsk->sk_inuse);
+		svc_xprt_get(&svsk->sk_xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
 		atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
 	} else {
@@ -1619,7 +1606,8 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 		svc_sock_received(svsk);
 	} else {
 		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
-			rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
+			rqstp, pool->sp_id, svsk,
+			atomic_read(&svsk->sk_xprt.xpt_ref.refcount));
 		len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
 		dprintk("svc: got len=%d\n", len);
 	}
@@ -1716,9 +1704,10 @@ svc_age_temp_sockets(unsigned long closure)
 
 		if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
 			continue;
-		if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags))
+		if (atomic_read(&svsk->sk_xprt.xpt_ref.refcount) > 1
+		    || test_bit(SK_BUSY, &svsk->sk_flags))
 			continue;
-		atomic_inc(&svsk->sk_inuse);
+		svc_xprt_get(&svsk->sk_xprt);
 		list_move(le, &to_be_aged);
 		set_bit(SK_CLOSE, &svsk->sk_flags);
 		set_bit(SK_DETACHED, &svsk->sk_flags);
@@ -1736,7 +1725,7 @@ svc_age_temp_sockets(unsigned long closure)
 
 		/* a thread will dequeue and close it soon */
 		svc_sock_enqueue(svsk);
-		svc_sock_put(svsk);
+		svc_xprt_put(&svsk->sk_xprt);
 	}
 
 	mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
@@ -1781,7 +1770,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	svsk->sk_odata = inet->sk_data_ready;
 	svsk->sk_owspace = inet->sk_write_space;
 	svsk->sk_server = serv;
-	atomic_set(&svsk->sk_inuse, 1);
 	svsk->sk_lastrecv = get_seconds();
 	spin_lock_init(&svsk->sk_lock);
 	INIT_LIST_HEAD(&svsk->sk_deferred);
@@ -1966,8 +1954,8 @@ svc_delete_socket(struct svc_sock *svsk)
 	 * is about to be destroyed (in svc_destroy).
 	 */
 	if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) {
-		BUG_ON(atomic_read(&svsk->sk_inuse)<2);
-		atomic_dec(&svsk->sk_inuse);
+		BUG_ON(atomic_read(&svsk->sk_xprt.xpt_ref.refcount) < 2);
+		svc_xprt_put(&svsk->sk_xprt);
 		if (test_bit(SK_TEMP, &svsk->sk_flags))
 			serv->sv_tmpcnt--;
 	}
@@ -1982,10 +1970,10 @@ static void svc_close_socket(struct svc_sock *svsk)
 		/* someone else will have to effect the close */
 		return;
 
-	atomic_inc(&svsk->sk_inuse);
+	svc_xprt_get(&svsk->sk_xprt);
 	svc_delete_socket(svsk);
 	clear_bit(SK_BUSY, &svsk->sk_flags);
-	svc_sock_put(svsk);
+	svc_xprt_put(&svsk->sk_xprt);
 }
 
 void svc_force_close_socket(struct svc_sock *svsk)
@@ -2011,7 +1999,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 	struct svc_sock *svsk;
 
 	if (too_many) {
-		svc_sock_put(dr->svsk);
+		svc_xprt_put(&dr->svsk->sk_xprt);
 		kfree(dr);
 		return;
 	}
@@ -2023,7 +2011,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 	spin_unlock(&svsk->sk_lock);
 	set_bit(SK_DEFERRED, &svsk->sk_flags);
 	svc_sock_enqueue(svsk);
-	svc_sock_put(svsk);
+	svc_xprt_put(&svsk->sk_xprt);
 }
 
 static struct cache_deferred_req *
@@ -2053,7 +2041,7 @@ svc_defer(struct cache_req *req)
 		dr->argslen = rqstp->rq_arg.len >> 2;
 		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
 	}
-	atomic_inc(&rqstp->rq_sock->sk_inuse);
+	svc_xprt_get(rqstp->rq_xprt);
 	dr->svsk = rqstp->rq_sock;
 
 	dr->handle.revisit = svc_revisit;

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 15/38] svc: Move sk_flags to the svc_xprt structure
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (13 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 14/38] svc: Change sk_inuse to a kref Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 16/38] svc: Move sk_server and sk_pool to svc_xprt Tom Tucker
                     ` (22 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


This functionally trivial change moves the transport independent sk_flags 
field to the transport independent svc_xprt structure.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |   12 +++
 include/linux/sunrpc/svcsock.h  |   13 ---
 net/sunrpc/svcsock.c            |  148 ++++++++++++++++++++-------------------
 3 files changed, 87 insertions(+), 86 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index eb801ad..f391d21 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -36,6 +36,18 @@ struct svc_xprt {
 	struct svc_xprt_class	*xpt_class;
 	struct svc_xprt_ops	*xpt_ops;
 	struct kref		xpt_ref;
+	unsigned long		xpt_flags;
+#define	XPT_BUSY	0		/* enqueued/receiving */
+#define	XPT_CONN	1		/* conn pending */
+#define	XPT_CLOSE	2		/* dead or dying */
+#define	XPT_DATA	3		/* data pending */
+#define	XPT_TEMP	4		/* connected transport */
+#define	XPT_DEAD	6		/* transport closed */
+#define	XPT_CHNGBUF	7		/* need to change snd/rcv buf sizes */
+#define	XPT_DEFERRED	8		/* deferred request pending */
+#define	XPT_OLD		9		/* used for xprt aging mark+sweep */
+#define	XPT_DETACHED	10		/* detached from tempsocks list */
+#define XPT_LISTENER	11		/* listening endpoint */
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index ba07d50..b8a8496 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -24,19 +24,6 @@ struct svc_sock {
 
 	struct svc_pool *	sk_pool;	/* current pool iff queued */
 	struct svc_serv *	sk_server;	/* service for this socket */
-	unsigned long		sk_flags;
-#define	SK_BUSY		0			/* enqueued/receiving */
-#define	SK_CONN		1			/* conn pending */
-#define	SK_CLOSE	2			/* dead or dying */
-#define	SK_DATA		3			/* data pending */
-#define	SK_TEMP		4			/* temp (TCP) socket */
-#define	SK_DEAD		6			/* socket closed */
-#define	SK_CHNGBUF	7			/* need to change snd/rcv buffer sizes */
-#define	SK_DEFERRED	8			/* request on sk_deferred */
-#define	SK_OLD		9			/* used for temp socket aging mark+sweep */
-#define	SK_DETACHED	10			/* detached from tempsocks list */
-#define SK_LISTENER	11			/* listening endpoint */
-
 	atomic_t    	    	sk_reserved;	/* space on outq that is reserved */
 
 	spinlock_t		sk_lock;	/* protects sk_deferred and
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 6dfceff..229a00c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -56,22 +56,23 @@
  *	BKL protects svc_serv->sv_nrthread.
  *	svc_sock->sk_lock protects the svc_sock->sk_deferred list
  *             and the ->sk_info_authunix cache.
- *	svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply.
+ *	svc_sock->sk_xprt.xpt_flags.XPT_BUSY prevents a svc_sock being
+ *	enqueued multiply.
  *
  *	Some flags can be set to certain values at any time
  *	providing that certain rules are followed:
  *
- *	SK_CONN, SK_DATA, can be set or cleared at any time.
+ *	XPT_CONN, XPT_DATA, can be set or cleared at any time.
  *		after a set, svc_sock_enqueue must be called.
  *		after a clear, the socket must be read/accepted
  *		 if this succeeds, it must be set again.
- *	SK_CLOSE can set at any time. It is never cleared.
- *      xpt_ref contains a bias of '1' until SK_DEAD is set.
+ *	XPT_CLOSE can set at any time. It is never cleared.
+ *      xpt_ref contains a bias of '1' until XPT_DEAD is set.
  *             so when xprt_ref hits zero, we know the transport is dead
  *             and no-one is using it.
- *      SK_DEAD can only be set while SK_BUSY is held which ensures
+ *      XPT_DEAD can only be set while XPT_BUSY is held which ensures
  *             no other thread will be using the socket or will try to
- *	       set SK_DEAD.
+ *	       set XPT_DEAD.
  *
  */
 
@@ -235,10 +236,10 @@ svc_sock_enqueue(struct svc_sock *svsk)
 	struct svc_rqst	*rqstp;
 	int cpu;
 
-	if (!(svsk->sk_flags &
-	      ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
+	if (!(svsk->sk_xprt.xpt_flags &
+	      ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
 		return;
-	if (test_bit(SK_DEAD, &svsk->sk_flags))
+	if (test_bit(XPT_DEAD, &svsk->sk_xprt.xpt_flags))
 		return;
 
 	cpu = get_cpu();
@@ -252,7 +253,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
 		printk(KERN_ERR
 			"svc_sock_enqueue: threads and sockets both waiting??\n");
 
-	if (test_bit(SK_DEAD, &svsk->sk_flags)) {
+	if (test_bit(XPT_DEAD, &svsk->sk_xprt.xpt_flags)) {
 		/* Don't enqueue dead sockets */
 		dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk);
 		goto out_unlock;
@@ -260,10 +261,10 @@ svc_sock_enqueue(struct svc_sock *svsk)
 
 	/* Mark socket as busy. It will remain in this state until the
 	 * server has processed all pending data and put the socket back
-	 * on the idle list.  We update SK_BUSY atomically because
+	 * on the idle list.  We update XPT_BUSY atomically because
 	 * it also guards against trying to enqueue the svc_sock twice.
 	 */
-	if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) {
+	if (test_and_set_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)) {
 		/* Don't enqueue socket while already enqueued */
 		dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
 		goto out_unlock;
@@ -272,11 +273,11 @@ svc_sock_enqueue(struct svc_sock *svsk)
 	svsk->sk_pool = pool;
 
 	/* Handle pending connection */
-	if (test_bit(SK_CONN, &svsk->sk_flags))
+	if (test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags))
 		goto process;
 
 	/* Handle close in-progress */
-	if (test_bit(SK_CLOSE, &svsk->sk_flags))
+	if (test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags))
 		goto process;
 
 	/* Check if we have space to reply to a request */
@@ -284,7 +285,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
 		/* Don't enqueue while not enough space for reply */
 		dprintk("svc: no write space, socket %p  not enqueued\n", svsk);
 		svsk->sk_pool = NULL;
-		clear_bit(SK_BUSY, &svsk->sk_flags);
+		clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
 		goto out_unlock;
 	}
 
@@ -340,14 +341,14 @@ svc_sock_dequeue(struct svc_pool *pool)
 /*
  * Having read something from a socket, check whether it
  * needs to be re-enqueued.
- * Note: SK_DATA only gets cleared when a read-attempt finds
+ * Note: XPT_DATA only gets cleared when a read-attempt finds
  * no (or insufficient) data.
  */
 static inline void
 svc_sock_received(struct svc_sock *svsk)
 {
 	svsk->sk_pool = NULL;
-	clear_bit(SK_BUSY, &svsk->sk_flags);
+	clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
 	svc_sock_enqueue(svsk);
 }
 
@@ -696,8 +697,9 @@ svc_udp_data_ready(struct sock *sk, int count)
 
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
-			svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags));
-		set_bit(SK_DATA, &svsk->sk_flags);
+			svsk, sk, count,
+			test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_sock_enqueue(svsk);
 	}
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
@@ -714,7 +716,7 @@ svc_write_space(struct sock *sk)
 
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
-			svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags));
+			svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
 		svc_sock_enqueue(svsk);
 	}
 
@@ -764,7 +766,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 		.msg_flags = MSG_DONTWAIT,
 	};
 
-	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
+	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
 	    /* udp sockets need large rcvbuf as all pending
 	     * requests are still in that buffer.  sndbuf must
 	     * also be large enough that there is enough space
@@ -782,7 +784,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 		return svc_deferred_recv(rqstp);
 	}
 
-	clear_bit(SK_DATA, &svsk->sk_flags);
+	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 	skb = NULL;
 	err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
 			     0, 0, MSG_PEEK | MSG_DONTWAIT);
@@ -793,7 +795,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 		if (err != -EAGAIN) {
 			/* possibly an icmp error */
 			dprintk("svc: recvfrom returned error %d\n", -err);
-			set_bit(SK_DATA, &svsk->sk_flags);
+			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		}
 		svc_sock_received(svsk);
 		return -EAGAIN;
@@ -805,7 +807,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 		   need that much accuracy */
 	}
 	svsk->sk_sk->sk_stamp = skb->tstamp;
-	set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
+	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
 
 	/*
 	 * Maybe more packets - kick another thread ASAP.
@@ -951,8 +953,8 @@ svc_udp_init(struct svc_sock *svsk)
 			    3 * svsk->sk_server->sv_max_mesg,
 			    3 * svsk->sk_server->sv_max_mesg);
 
-	set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */
-	set_bit(SK_CHNGBUF, &svsk->sk_flags);
+	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* might have come in before data_ready set up */
+	set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 
 	oldfs = get_fs();
 	set_fs(KERNEL_DS);
@@ -986,7 +988,7 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
 	 */
 	if (sk->sk_state == TCP_LISTEN) {
 		if (svsk) {
-			set_bit(SK_CONN, &svsk->sk_flags);
+			set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 			svc_sock_enqueue(svsk);
 		} else
 			printk("svc: socket %p: no user data\n", sk);
@@ -1010,7 +1012,7 @@ svc_tcp_state_change(struct sock *sk)
 	if (!svsk)
 		printk("svc: socket %p: no user data\n", sk);
 	else {
-		set_bit(SK_CLOSE, &svsk->sk_flags);
+		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 		svc_sock_enqueue(svsk);
 	}
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
@@ -1025,7 +1027,7 @@ svc_tcp_data_ready(struct sock *sk, int count)
 	dprintk("svc: socket %p TCP data ready (svsk %p)\n",
 		sk, sk->sk_user_data);
 	if (svsk) {
-		set_bit(SK_DATA, &svsk->sk_flags);
+		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_sock_enqueue(svsk);
 	}
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
@@ -1065,7 +1067,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	if (!sock)
 		return NULL;
 
-	clear_bit(SK_CONN, &svsk->sk_flags);
+	clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 	err = kernel_accept(sock, &newsock, O_NONBLOCK);
 	if (err < 0) {
 		if (err == -ENOMEM)
@@ -1077,7 +1079,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 		return NULL;
 	}
 
-	set_bit(SK_CONN, &svsk->sk_flags);
+	set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 	svc_sock_enqueue(svsk);
 
 	err = kernel_getpeername(newsock, sin, &slen);
@@ -1143,16 +1145,16 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	int pnum, vlen;
 
 	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
-		svsk, test_bit(SK_DATA, &svsk->sk_flags),
-		test_bit(SK_CONN, &svsk->sk_flags),
-		test_bit(SK_CLOSE, &svsk->sk_flags));
+		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
+		test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
+		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
 
 	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
 		svc_sock_received(svsk);
 		return svc_deferred_recv(rqstp);
 	}
 
-	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
+	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
 		/* sndbuf needs to have room for one request
 		 * per thread, otherwise we can stall even when the
 		 * network isn't a bottleneck.
@@ -1169,7 +1171,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 				    (serv->sv_nrthreads+3) * serv->sv_max_mesg,
 				    3 * serv->sv_max_mesg);
 
-	clear_bit(SK_DATA, &svsk->sk_flags);
+	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 
 	/* Receive data. If we haven't got the record length yet, get
 	 * the next four bytes. Otherwise try to gobble up as much as
@@ -1228,7 +1230,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		return -EAGAIN;	/* record not complete */
 	}
 	len = svsk->sk_reclen;
-	set_bit(SK_DATA, &svsk->sk_flags);
+	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 
 	vec = rqstp->rq_vec;
 	vec[0] = rqstp->rq_arg.head[0];
@@ -1304,7 +1306,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 	reclen = htonl(0x80000000|((xbufp->len ) - 4));
 	memcpy(xbufp->head[0].iov_base, &reclen, 4);
 
-	if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags))
+	if (test_bit(XPT_DEAD, &rqstp->rq_sock->sk_xprt.xpt_flags))
 		return -ENOTCONN;
 
 	sent = svc_sendto(rqstp, &rqstp->rq_res);
@@ -1313,7 +1315,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 		       rqstp->rq_sock->sk_server->sv_name,
 		       (sent<0)?"got error":"sent only",
 		       sent, xbufp->len);
-		set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags);
+		set_bit(XPT_CLOSE, &rqstp->rq_sock->sk_xprt.xpt_flags);
 		svc_sock_enqueue(rqstp->rq_sock);
 		sent = -EAGAIN;
 	}
@@ -1396,9 +1398,9 @@ svc_tcp_init(struct svc_sock *svsk)
 
 	if (sk->sk_state == TCP_LISTEN) {
 		dprintk("setting up TCP socket for listening\n");
-		set_bit(SK_LISTENER, &svsk->sk_flags);
+		set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
 		sk->sk_data_ready = svc_tcp_listen_data_ready;
-		set_bit(SK_CONN, &svsk->sk_flags);
+		set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 	} else {
 		dprintk("setting up TCP socket for reading\n");
 		sk->sk_state_change = svc_tcp_state_change;
@@ -1418,10 +1420,10 @@ svc_tcp_init(struct svc_sock *svsk)
 				    3 * svsk->sk_server->sv_max_mesg,
 				    3 * svsk->sk_server->sv_max_mesg);
 
-		set_bit(SK_CHNGBUF, &svsk->sk_flags);
-		set_bit(SK_DATA, &svsk->sk_flags);
+		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
+		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		if (sk->sk_state != TCP_ESTABLISHED)
-			set_bit(SK_CLOSE, &svsk->sk_flags);
+			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 	}
 }
 
@@ -1438,12 +1440,12 @@ svc_sock_update_bufs(struct svc_serv *serv)
 	list_for_each(le, &serv->sv_permsocks) {
 		struct svc_sock *svsk =
 			list_entry(le, struct svc_sock, sk_list);
-		set_bit(SK_CHNGBUF, &svsk->sk_flags);
+		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 	}
 	list_for_each(le, &serv->sv_tempsocks) {
 		struct svc_sock *svsk =
 			list_entry(le, struct svc_sock, sk_list);
-		set_bit(SK_CHNGBUF, &svsk->sk_flags);
+		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 	}
 	spin_unlock_bh(&serv->sv_lock);
 }
@@ -1485,7 +1487,7 @@ svc_check_conn_limits(struct svc_serv *serv)
 			svsk = list_entry(serv->sv_tempsocks.prev,
 					  struct svc_sock,
 					  sk_list);
-			set_bit(SK_CLOSE, &svsk->sk_flags);
+			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 			svc_xprt_get(&svsk->sk_xprt);
 		}
 		spin_unlock_bh(&serv->sv_lock);
@@ -1589,10 +1591,10 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 	spin_unlock_bh(&pool->sp_lock);
 
 	len = 0;
-	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
-		dprintk("svc_recv: found SK_CLOSE\n");
+	if (test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)) {
+		dprintk("svc_recv: found XPT_CLOSE\n");
 		svc_delete_socket(svsk);
-	} else if (test_bit(SK_LISTENER, &svsk->sk_flags)) {
+	} else if (test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags)) {
 		struct svc_xprt *newxpt;
 		newxpt = svsk->sk_xprt.xpt_ops->xpo_accept(&svsk->sk_xprt);
 		if (newxpt) {
@@ -1619,7 +1621,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 		return -EAGAIN;
 	}
 	svsk->sk_lastrecv = get_seconds();
-	clear_bit(SK_OLD, &svsk->sk_flags);
+	clear_bit(XPT_OLD, &svsk->sk_xprt.xpt_flags);
 
 	rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
 	rqstp->rq_chandle.defer = svc_defer;
@@ -1666,7 +1668,7 @@ svc_send(struct svc_rqst *rqstp)
 
 	/* Grab svsk->sk_mutex to serialize outgoing data. */
 	mutex_lock(&svsk->sk_mutex);
-	if (test_bit(SK_DEAD, &svsk->sk_flags))
+	if (test_bit(XPT_DEAD, &svsk->sk_xprt.xpt_flags))
 		len = -ENOTCONN;
 	else
 		len = svsk->sk_xprt.xpt_ops->xpo_sendto(rqstp);
@@ -1702,21 +1704,21 @@ svc_age_temp_sockets(unsigned long closure)
 	list_for_each_safe(le, next, &serv->sv_tempsocks) {
 		svsk = list_entry(le, struct svc_sock, sk_list);
 
-		if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
+		if (!test_and_set_bit(XPT_OLD, &svsk->sk_xprt.xpt_flags))
 			continue;
 		if (atomic_read(&svsk->sk_xprt.xpt_ref.refcount) > 1
-		    || test_bit(SK_BUSY, &svsk->sk_flags))
+		    || test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags))
 			continue;
 		svc_xprt_get(&svsk->sk_xprt);
 		list_move(le, &to_be_aged);
-		set_bit(SK_CLOSE, &svsk->sk_flags);
-		set_bit(SK_DETACHED, &svsk->sk_flags);
+		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+		set_bit(XPT_DETACHED, &svsk->sk_xprt.xpt_flags);
 	}
 	spin_unlock_bh(&serv->sv_lock);
 
 	while (!list_empty(&to_be_aged)) {
 		le = to_be_aged.next;
-		/* fiddling the sk_list node is safe 'cos we're SK_DETACHED */
+		/* fiddling the sk_list node is safe 'cos we're XPT_DETACHED */
 		list_del_init(le);
 		svsk = list_entry(le, struct svc_sock, sk_list);
 
@@ -1762,7 +1764,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 		return NULL;
 	}
 
-	set_bit(SK_BUSY, &svsk->sk_flags);
+	set_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
 	inet->sk_user_data = svsk;
 	svsk->sk_sock = sock;
 	svsk->sk_sk = inet;
@@ -1784,7 +1786,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 
 	spin_lock_bh(&serv->sv_lock);
 	if (is_temporary) {
-		set_bit(SK_TEMP, &svsk->sk_flags);
+		set_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
 		list_add(&svsk->sk_list, &serv->sv_tempsocks);
 		serv->sv_tmpcnt++;
 		if (serv->sv_temptimer.function == NULL) {
@@ -1795,7 +1797,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 					jiffies + svc_conn_age_period * HZ);
 		}
 	} else {
-		clear_bit(SK_TEMP, &svsk->sk_flags);
+		clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
 		list_add(&svsk->sk_list, &serv->sv_permsocks);
 	}
 	spin_unlock_bh(&serv->sv_lock);
@@ -1944,7 +1946,7 @@ svc_delete_socket(struct svc_sock *svsk)
 
 	spin_lock_bh(&serv->sv_lock);
 
-	if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
+	if (!test_and_set_bit(XPT_DETACHED, &svsk->sk_xprt.xpt_flags))
 		list_del_init(&svsk->sk_list);
 	/*
 	 * We used to delete the svc_sock from whichever list
@@ -1953,10 +1955,10 @@ svc_delete_socket(struct svc_sock *svsk)
 	 * while still attached to a queue, the queue itself
 	 * is about to be destroyed (in svc_destroy).
 	 */
-	if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) {
+	if (!test_and_set_bit(XPT_DEAD, &svsk->sk_xprt.xpt_flags)) {
 		BUG_ON(atomic_read(&svsk->sk_xprt.xpt_ref.refcount) < 2);
 		svc_xprt_put(&svsk->sk_xprt);
-		if (test_bit(SK_TEMP, &svsk->sk_flags))
+		if (test_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags))
 			serv->sv_tmpcnt--;
 	}
 
@@ -1965,26 +1967,26 @@ svc_delete_socket(struct svc_sock *svsk)
 
 static void svc_close_socket(struct svc_sock *svsk)
 {
-	set_bit(SK_CLOSE, &svsk->sk_flags);
-	if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
+	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+	if (test_and_set_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags))
 		/* someone else will have to effect the close */
 		return;
 
 	svc_xprt_get(&svsk->sk_xprt);
 	svc_delete_socket(svsk);
-	clear_bit(SK_BUSY, &svsk->sk_flags);
+	clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
 	svc_xprt_put(&svsk->sk_xprt);
 }
 
 void svc_force_close_socket(struct svc_sock *svsk)
 {
-	set_bit(SK_CLOSE, &svsk->sk_flags);
-	if (test_bit(SK_BUSY, &svsk->sk_flags)) {
+	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+	if (test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)) {
 		/* Waiting to be processed, but no threads left,
 		 * So just remove it from the waiting list
 		 */
 		list_del_init(&svsk->sk_ready);
-		clear_bit(SK_BUSY, &svsk->sk_flags);
+		clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
 	}
 	svc_close_socket(svsk);
 }
@@ -2009,7 +2011,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 	spin_lock(&svsk->sk_lock);
 	list_add(&dr->handle.recent, &svsk->sk_deferred);
 	spin_unlock(&svsk->sk_lock);
-	set_bit(SK_DEFERRED, &svsk->sk_flags);
+	set_bit(XPT_DEFERRED, &svsk->sk_xprt.xpt_flags);
 	svc_sock_enqueue(svsk);
 	svc_xprt_put(&svsk->sk_xprt);
 }
@@ -2072,16 +2074,16 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
 {
 	struct svc_deferred_req *dr = NULL;
 
-	if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
+	if (!test_bit(XPT_DEFERRED, &svsk->sk_xprt.xpt_flags))
 		return NULL;
 	spin_lock(&svsk->sk_lock);
-	clear_bit(SK_DEFERRED, &svsk->sk_flags);
+	clear_bit(XPT_DEFERRED, &svsk->sk_xprt.xpt_flags);
 	if (!list_empty(&svsk->sk_deferred)) {
 		dr = list_entry(svsk->sk_deferred.next,
 				struct svc_deferred_req,
 				handle.recent);
 		list_del_init(&dr->handle.recent);
-		set_bit(SK_DEFERRED, &svsk->sk_flags);
+		set_bit(XPT_DEFERRED, &svsk->sk_xprt.xpt_flags);
 	}
 	spin_unlock(&svsk->sk_lock);
 	return dr;

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 16/38] svc: Move sk_server and sk_pool to svc_xprt
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (14 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 15/38] svc: Move sk_flags to the svc_xprt structure Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 17/38] svc: Make close transport independent Tom Tucker
                     ` (21 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


This is another incremental change that moves transport independent 
fields from svc_sock to the svc_xprt structure. The changes 
should be functionally null.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    6 ++++
 include/linux/sunrpc/svcsock.h  |    2 -
 net/sunrpc/svc_xprt.c           |    4 ++-
 net/sunrpc/svcsock.c            |   55 +++++++++++++++++++--------------------
 4 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index f391d21..4f7dbbc 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -48,11 +48,15 @@ struct svc_xprt {
 #define	XPT_OLD		9		/* used for xprt aging mark+sweep */
 #define	XPT_DETACHED	10		/* detached from tempsocks list */
 #define XPT_LISTENER	11		/* listening endpoint */
+
+	struct svc_pool		*xpt_pool;	/* current pool iff queued */
+	struct svc_serv		*xpt_server;	/* service for transport */
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
 int	svc_unreg_xprt_class(struct svc_xprt_class *);
-void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *);
+void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
+		      struct svc_serv *);
 int	svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
 void	svc_xprt_put(struct svc_xprt *xprt);
 
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index b8a8496..92d4cc9 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -22,8 +22,6 @@ struct svc_sock {
 	struct socket *		sk_sock;	/* berkeley socket layer */
 	struct sock *		sk_sk;		/* INET layer */
 
-	struct svc_pool *	sk_pool;	/* current pool iff queued */
-	struct svc_serv *	sk_server;	/* service for this socket */
 	atomic_t    	    	sk_reserved;	/* space on outq that is reserved */
 
 	spinlock_t		sk_lock;	/* protects sk_deferred and
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 43418cf..e366add 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -102,12 +102,14 @@ EXPORT_SYMBOL_GPL(svc_xprt_put);
  * Called by transport drivers to initialize the transport independent
  * portion of the transport instance.
  */
-void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt)
+void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
+		   struct svc_serv *serv)
 {
 	memset(xprt, 0, sizeof(*xprt));
 	xprt->xpt_class = xcl;
 	xprt->xpt_ops = xcl->xcl_ops;
 	kref_init(&xprt->xpt_ref);
+	xprt->xpt_server = serv;
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 229a00c..3da7933 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -231,7 +231,7 @@ svc_sock_wspace(struct svc_sock *svsk)
 static void
 svc_sock_enqueue(struct svc_sock *svsk)
 {
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp;
 	int cpu;
@@ -243,7 +243,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
 		return;
 
 	cpu = get_cpu();
-	pool = svc_pool_for_cpu(svsk->sk_server, cpu);
+	pool = svc_pool_for_cpu(svsk->sk_xprt.xpt_server, cpu);
 	put_cpu();
 
 	spin_lock_bh(&pool->sp_lock);
@@ -269,8 +269,8 @@ svc_sock_enqueue(struct svc_sock *svsk)
 		dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
 		goto out_unlock;
 	}
-	BUG_ON(svsk->sk_pool != NULL);
-	svsk->sk_pool = pool;
+	BUG_ON(svsk->sk_xprt.xpt_pool != NULL);
+	svsk->sk_xprt.xpt_pool = pool;
 
 	/* Handle pending connection */
 	if (test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags))
@@ -284,7 +284,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
 	if (!svsk->sk_xprt.xpt_ops->xpo_has_wspace(&svsk->sk_xprt)) {
 		/* Don't enqueue while not enough space for reply */
 		dprintk("svc: no write space, socket %p  not enqueued\n", svsk);
-		svsk->sk_pool = NULL;
+		svsk->sk_xprt.xpt_pool = NULL;
 		clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
 		goto out_unlock;
 	}
@@ -305,12 +305,12 @@ svc_sock_enqueue(struct svc_sock *svsk)
 		svc_xprt_get(&svsk->sk_xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
 		atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
-		BUG_ON(svsk->sk_pool != pool);
+		BUG_ON(svsk->sk_xprt.xpt_pool != pool);
 		wake_up(&rqstp->rq_wait);
 	} else {
 		dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
 		list_add_tail(&svsk->sk_ready, &pool->sp_sockets);
-		BUG_ON(svsk->sk_pool != pool);
+		BUG_ON(svsk->sk_xprt.xpt_pool != pool);
 	}
 
 out_unlock:
@@ -347,7 +347,7 @@ svc_sock_dequeue(struct svc_pool *pool)
 static inline void
 svc_sock_received(struct svc_sock *svsk)
 {
-	svsk->sk_pool = NULL;
+	svsk->sk_xprt.xpt_pool = NULL;
 	clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
 	svc_sock_enqueue(svsk);
 }
@@ -751,7 +751,7 @@ static int
 svc_udp_recvfrom(struct svc_rqst *rqstp)
 {
 	struct svc_sock	*svsk = rqstp->rq_sock;
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	struct sk_buff	*skb;
 	union {
 		struct cmsghdr	hdr;
@@ -889,7 +889,7 @@ static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
 static int svc_udp_has_wspace(struct svc_xprt *xprt)
 {
 	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	int required;
 
 	/*
@@ -936,12 +936,12 @@ static struct svc_xprt_class svc_udp_class = {
 };
 
 static void
-svc_udp_init(struct svc_sock *svsk)
+svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
 	int one = 1;
 	mm_segment_t oldfs;
 
-	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt);
+	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
 	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
 	svsk->sk_sk->sk_write_space = svc_write_space;
 
@@ -950,8 +950,8 @@ svc_udp_init(struct svc_sock *svsk)
 	 * svc_udp_recvfrom will re-adjust if necessary
 	 */
 	svc_sock_setbufsize(svsk->sk_sock,
-			    3 * svsk->sk_server->sv_max_mesg,
-			    3 * svsk->sk_server->sv_max_mesg);
+			    3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
+			    3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
 
 	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* might have come in before data_ready set up */
 	set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
@@ -1056,7 +1056,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 	struct sockaddr_storage addr;
 	struct sockaddr	*sin = (struct sockaddr *) &addr;
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	struct socket	*sock = svsk->sk_sock;
 	struct socket	*newsock;
 	struct svc_sock	*newsvsk;
@@ -1139,7 +1139,7 @@ static int
 svc_tcp_recvfrom(struct svc_rqst *rqstp)
 {
 	struct svc_sock	*svsk = rqstp->rq_sock;
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	int		len;
 	struct kvec *vec;
 	int pnum, vlen;
@@ -1282,7 +1282,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		svc_sock_received(svsk);
 	} else {
 		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
-					svsk->sk_server->sv_name, -len);
+		       svsk->sk_xprt.xpt_server->sv_name, -len);
 		goto err_delete;
 	}
 
@@ -1312,7 +1312,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 	sent = svc_sendto(rqstp, &rqstp->rq_res);
 	if (sent != xbufp->len) {
 		printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
-		       rqstp->rq_sock->sk_server->sv_name,
+		       rqstp->rq_sock->sk_xprt.xpt_server->sv_name,
 		       (sent<0)?"got error":"sent only",
 		       sent, xbufp->len);
 		set_bit(XPT_CLOSE, &rqstp->rq_sock->sk_xprt.xpt_flags);
@@ -1336,7 +1336,7 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
 static int svc_tcp_has_wspace(struct svc_xprt *xprt)
 {
 	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
-	struct svc_serv	*serv = svsk->sk_server;
+	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	int required;
 
 	/*
@@ -1389,12 +1389,12 @@ void svc_cleanup_xprt_sock(void)
 }
 
 static void
-svc_tcp_init(struct svc_sock *svsk)
+svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
 	struct sock	*sk = svsk->sk_sk;
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt);
+	svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv);
 
 	if (sk->sk_state == TCP_LISTEN) {
 		dprintk("setting up TCP socket for listening\n");
@@ -1417,8 +1417,8 @@ svc_tcp_init(struct svc_sock *svsk)
 		 * svc_tcp_recvfrom will re-adjust if necessary
 		 */
 		svc_sock_setbufsize(svsk->sk_sock,
-				    3 * svsk->sk_server->sv_max_mesg,
-				    3 * svsk->sk_server->sv_max_mesg);
+				    3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
+				    3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
 
 		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1603,7 +1603,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 			 * listener holds a reference too
 			 */
 			__module_get(newxpt->xpt_class->xcl_owner);
-			svc_check_conn_limits(svsk->sk_server);
+			svc_check_conn_limits(svsk->sk_xprt.xpt_server);
 		}
 		svc_sock_received(svsk);
 	} else {
@@ -1771,7 +1771,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	svsk->sk_ostate = inet->sk_state_change;
 	svsk->sk_odata = inet->sk_data_ready;
 	svsk->sk_owspace = inet->sk_write_space;
-	svsk->sk_server = serv;
 	svsk->sk_lastrecv = get_seconds();
 	spin_lock_init(&svsk->sk_lock);
 	INIT_LIST_HEAD(&svsk->sk_deferred);
@@ -1780,9 +1779,9 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 
 	/* Initialize the socket */
 	if (sock->type == SOCK_DGRAM)
-		svc_udp_init(svsk);
+		svc_udp_init(svsk, serv);
 	else
-		svc_tcp_init(svsk);
+		svc_tcp_init(svsk, serv);
 
 	spin_lock_bh(&serv->sv_lock);
 	if (is_temporary) {
@@ -1939,7 +1938,7 @@ svc_delete_socket(struct svc_sock *svsk)
 
 	dprintk("svc: svc_delete_socket(%p)\n", svsk);
 
-	serv = svsk->sk_server;
+	serv = svsk->sk_xprt.xpt_server;
 	sk = svsk->sk_sk;
 
 	svsk->sk_xprt.xpt_ops->xpo_detach(&svsk->sk_xprt);

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 17/38] svc: Make close transport independent
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (15 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 16/38] svc: Move sk_server and sk_pool to svc_xprt Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 18/38] svc: Move sk_reserved to svc_xprt Tom Tucker
                     ` (20 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Move sk_list and sk_ready to svc_xprt. This involves close because these
lists are walked by svcs when closing all their transports. So I combined
the moving of these lists to svc_xprt with making close transport independent.

The svc_force_sock_close has been changed to svc_close_all and takes a list
as an argument. This removes some svc internals knowledge from the svcs. 

This code races with module removal and transport addition. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 fs/lockd/svc.c                  |    6 +-
 fs/nfsd/nfssvc.c                |    4 +
 include/linux/sunrpc/svc_xprt.h |    2 +
 include/linux/sunrpc/svcsock.h  |    4 -
 net/sunrpc/svc.c                |    9 +--
 net/sunrpc/svc_xprt.c           |    2 +
 net/sunrpc/svcsock.c            |  105 +++++++++++++++++++--------------------
 7 files changed, 63 insertions(+), 69 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 8686915..a8e79a9 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -221,10 +221,10 @@ lockd(struct svc_rqst *rqstp)
 
 static int find_xprt(struct svc_serv *serv, char *proto)
 {
-	struct svc_sock *svsk;
+	struct svc_xprt *xprt;
 	int found = 0;
-	list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
-		if (strcmp(svsk->sk_xprt.xpt_class->xcl_name, proto) == 0) {
+	list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
+		if (strcmp(xprt->xpt_class->xcl_name, proto) == 0) {
 			found = 1;
 			break;
 		}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index a828b0b..9647b0f 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -155,8 +155,8 @@ static int killsig;	/* signal that was used to kill last nfsd */
 static void nfsd_last_thread(struct svc_serv *serv)
 {
 	/* When last nfsd thread exits we need to do some clean-up */
-	struct svc_sock *svsk;
-	list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
+	struct svc_xprt *xprt;
+	list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
 		lockd_down();
 	nfsd_serv = NULL;
 	nfsd_racache_shutdown();
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 4f7dbbc..21fa6ad 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -36,6 +36,8 @@ struct svc_xprt {
 	struct svc_xprt_class	*xpt_class;
 	struct svc_xprt_ops	*xpt_ops;
 	struct kref		xpt_ref;
+	struct list_head	xpt_list;
+	struct list_head	xpt_ready;
 	unsigned long		xpt_flags;
 #define	XPT_BUSY	0		/* enqueued/receiving */
 #define	XPT_CONN	1		/* conn pending */
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 92d4cc9..060508b 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -17,8 +17,6 @@
  */
 struct svc_sock {
 	struct svc_xprt		sk_xprt;
-	struct list_head	sk_ready;	/* list of ready sockets */
-	struct list_head	sk_list;	/* list of all sockets */
 	struct socket *		sk_sock;	/* berkeley socket layer */
 	struct sock *		sk_sk;		/* INET layer */
 
@@ -51,7 +49,7 @@ struct svc_sock {
 /*
  * Function prototypes.
  */
-void		svc_force_close_socket(struct svc_sock *);
+void		svc_close_all(struct list_head *);
 int		svc_recv(struct svc_rqst *, long);
 int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4e084a2..07c9d8a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -458,9 +458,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 void
 svc_destroy(struct svc_serv *serv)
 {
-	struct svc_sock	*svsk;
-	struct svc_sock *tmp;
-
 	dprintk("svc: svc_destroy(%s, %d)\n",
 				serv->sv_program->pg_name,
 				serv->sv_nrthreads);
@@ -475,14 +472,12 @@ svc_destroy(struct svc_serv *serv)
 
 	del_timer_sync(&serv->sv_temptimer);
 
-	list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list)
-		svc_force_close_socket(svsk);
+	svc_close_all(&serv->sv_tempsocks);
 
 	if (serv->sv_shutdown)
 		serv->sv_shutdown(serv);
 
-	list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list)
-		svc_force_close_socket(svsk);
+	svc_close_all(&serv->sv_permsocks);
 
 	BUG_ON(!list_empty(&serv->sv_permsocks));
 	BUG_ON(!list_empty(&serv->sv_tempsocks));
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e366add..bbdada7 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -110,6 +110,8 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	xprt->xpt_ops = xcl->xcl_ops;
 	kref_init(&xprt->xpt_ref);
 	xprt->xpt_server = serv;
+	INIT_LIST_HEAD(&xprt->xpt_list);
+	INIT_LIST_HEAD(&xprt->xpt_ready);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 3da7933..14f72c0 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -81,11 +81,11 @@
 
 static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
 					 int *errp, int flags);
-static void		svc_delete_socket(struct svc_sock *svsk);
+static void		svc_delete_xprt(struct svc_xprt *xprt);
 static void		svc_udp_data_ready(struct sock *, int);
 static int		svc_udp_recvfrom(struct svc_rqst *);
 static int		svc_udp_sendto(struct svc_rqst *);
-static void		svc_close_socket(struct svc_sock *svsk);
+static void		svc_close_xprt(struct svc_xprt *xprt);
 static void		svc_sock_detach(struct svc_xprt *);
 static void		svc_sock_free(struct svc_xprt *);
 
@@ -309,7 +309,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
 		wake_up(&rqstp->rq_wait);
 	} else {
 		dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
-		list_add_tail(&svsk->sk_ready, &pool->sp_sockets);
+		list_add_tail(&svsk->sk_xprt.xpt_ready, &pool->sp_sockets);
 		BUG_ON(svsk->sk_xprt.xpt_pool != pool);
 	}
 
@@ -329,8 +329,8 @@ svc_sock_dequeue(struct svc_pool *pool)
 		return NULL;
 
 	svsk = list_entry(pool->sp_sockets.next,
-			  struct svc_sock, sk_ready);
-	list_del_init(&svsk->sk_ready);
+			  struct svc_sock, sk_xprt.xpt_ready);
+	list_del_init(&svsk->sk_xprt.xpt_ready);
 
 	dprintk("svc: socket %p dequeued, inuse=%d\n",
 		svsk->sk_sk, atomic_read(&svsk->sk_xprt.xpt_ref.refcount));
@@ -588,7 +588,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
 	if (!serv)
 		return 0;
 	spin_lock_bh(&serv->sv_lock);
-	list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) {
+	list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) {
 		int onelen = one_sock_name(buf+len, svsk);
 		if (toclose && strcmp(toclose, buf+len) == 0)
 			closesk = svsk;
@@ -600,7 +600,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
 		/* Should unregister with portmap, but you cannot
 		 * unregister just one protocol...
 		 */
-		svc_close_socket(closesk);
+		svc_close_xprt(&closesk->sk_xprt);
 	else if (toclose)
 		return -ENOENT;
 	return len;
@@ -1273,7 +1273,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	return len;
 
  err_delete:
-	svc_delete_socket(svsk);
+	svc_delete_xprt(&svsk->sk_xprt);
 	return -EAGAIN;
 
  error:
@@ -1439,12 +1439,12 @@ svc_sock_update_bufs(struct svc_serv *serv)
 	spin_lock_bh(&serv->sv_lock);
 	list_for_each(le, &serv->sv_permsocks) {
 		struct svc_sock *svsk =
-			list_entry(le, struct svc_sock, sk_list);
+			list_entry(le, struct svc_sock, sk_xprt.xpt_list);
 		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 	}
 	list_for_each(le, &serv->sv_tempsocks) {
 		struct svc_sock *svsk =
-			list_entry(le, struct svc_sock, sk_list);
+			list_entry(le, struct svc_sock, sk_xprt.xpt_list);
 		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 	}
 	spin_unlock_bh(&serv->sv_lock);
@@ -1486,7 +1486,7 @@ svc_check_conn_limits(struct svc_serv *serv)
 			 */
 			svsk = list_entry(serv->sv_tempsocks.prev,
 					  struct svc_sock,
-					  sk_list);
+					  sk_xprt.xpt_list);
 			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 			svc_xprt_get(&svsk->sk_xprt);
 		}
@@ -1593,7 +1593,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 	len = 0;
 	if (test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)) {
 		dprintk("svc_recv: found XPT_CLOSE\n");
-		svc_delete_socket(svsk);
+		svc_delete_xprt(&svsk->sk_xprt);
 	} else if (test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags)) {
 		struct svc_xprt *newxpt;
 		newxpt = svsk->sk_xprt.xpt_ops->xpo_accept(&svsk->sk_xprt);
@@ -1702,7 +1702,7 @@ svc_age_temp_sockets(unsigned long closure)
 	}
 
 	list_for_each_safe(le, next, &serv->sv_tempsocks) {
-		svsk = list_entry(le, struct svc_sock, sk_list);
+		svsk = list_entry(le, struct svc_sock, sk_xprt.xpt_list);
 
 		if (!test_and_set_bit(XPT_OLD, &svsk->sk_xprt.xpt_flags))
 			continue;
@@ -1718,9 +1718,9 @@ svc_age_temp_sockets(unsigned long closure)
 
 	while (!list_empty(&to_be_aged)) {
 		le = to_be_aged.next;
-		/* fiddling the sk_list node is safe 'cos we're XPT_DETACHED */
+		/* fiddling the sk_xprt.xpt_list node is safe 'cos we're XPT_DETACHED */
 		list_del_init(le);
-		svsk = list_entry(le, struct svc_sock, sk_list);
+		svsk = list_entry(le, struct svc_sock, sk_xprt.xpt_list);
 
 		dprintk("queuing svsk %p for closing, %lu seconds old\n",
 			svsk, get_seconds() - svsk->sk_lastrecv);
@@ -1774,7 +1774,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	svsk->sk_lastrecv = get_seconds();
 	spin_lock_init(&svsk->sk_lock);
 	INIT_LIST_HEAD(&svsk->sk_deferred);
-	INIT_LIST_HEAD(&svsk->sk_ready);
 	mutex_init(&svsk->sk_mutex);
 
 	/* Initialize the socket */
@@ -1786,7 +1785,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	spin_lock_bh(&serv->sv_lock);
 	if (is_temporary) {
 		set_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
-		list_add(&svsk->sk_list, &serv->sv_tempsocks);
+		list_add(&svsk->sk_xprt.xpt_list, &serv->sv_tempsocks);
 		serv->sv_tmpcnt++;
 		if (serv->sv_temptimer.function == NULL) {
 			/* setup timer to age temp sockets */
@@ -1797,7 +1796,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 		}
 	} else {
 		clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
-		list_add(&svsk->sk_list, &serv->sv_permsocks);
+		list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks);
 	}
 	spin_unlock_bh(&serv->sv_lock);
 
@@ -1928,66 +1927,64 @@ static void svc_sock_free(struct svc_xprt *xprt)
 }
 
 /*
- * Remove a dead socket
+ * Remove a dead transport
  */
 static void
-svc_delete_socket(struct svc_sock *svsk)
+svc_delete_xprt(struct svc_xprt *xprt)
 {
-	struct svc_serv	*serv;
-	struct sock	*sk;
+	struct svc_serv	*serv = xprt->xpt_server;
 
-	dprintk("svc: svc_delete_socket(%p)\n", svsk);
-
-	serv = svsk->sk_xprt.xpt_server;
-	sk = svsk->sk_sk;
-
-	svsk->sk_xprt.xpt_ops->xpo_detach(&svsk->sk_xprt);
+	dprintk("svc: svc_delete_xprt(%p)\n", xprt);
+	xprt->xpt_ops->xpo_detach(xprt);
 
 	spin_lock_bh(&serv->sv_lock);
-
-	if (!test_and_set_bit(XPT_DETACHED, &svsk->sk_xprt.xpt_flags))
-		list_del_init(&svsk->sk_list);
+	if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
+		list_del_init(&xprt->xpt_list);
 	/*
-	 * We used to delete the svc_sock from whichever list
-	 * it's sk_ready node was on, but we don't actually
+	 * We used to delete the transport from whichever list
+	 * it's sk_xprt.xpt_ready node was on, but we don't actually
 	 * need to.  This is because the only time we're called
 	 * while still attached to a queue, the queue itself
 	 * is about to be destroyed (in svc_destroy).
 	 */
-	if (!test_and_set_bit(XPT_DEAD, &svsk->sk_xprt.xpt_flags)) {
-		BUG_ON(atomic_read(&svsk->sk_xprt.xpt_ref.refcount) < 2);
-		svc_xprt_put(&svsk->sk_xprt);
-		if (test_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags))
+	if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) {
+		BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2);
+		svc_xprt_put(xprt);
+		if (test_bit(XPT_TEMP, &xprt->xpt_flags))
 			serv->sv_tmpcnt--;
 	}
-
 	spin_unlock_bh(&serv->sv_lock);
 }
 
-static void svc_close_socket(struct svc_sock *svsk)
+static void svc_close_xprt(struct svc_xprt *xprt)
 {
-	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
-	if (test_and_set_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags))
+	set_bit(XPT_CLOSE, &xprt->xpt_flags);
+	if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
 		/* someone else will have to effect the close */
 		return;
 
-	svc_xprt_get(&svsk->sk_xprt);
-	svc_delete_socket(svsk);
-	clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
-	svc_xprt_put(&svsk->sk_xprt);
+	svc_xprt_get(xprt);
+	svc_delete_xprt(xprt);
+	clear_bit(XPT_BUSY, &xprt->xpt_flags);
+	svc_xprt_put(xprt);
 }
 
-void svc_force_close_socket(struct svc_sock *svsk)
+void svc_close_all(struct list_head *xprt_list)
 {
-	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
-	if (test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)) {
-		/* Waiting to be processed, but no threads left,
-		 * So just remove it from the waiting list
-		 */
-		list_del_init(&svsk->sk_ready);
-		clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
+	struct svc_xprt *xprt;
+	struct svc_xprt *tmp;
+
+	list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
+		set_bit(XPT_CLOSE, &xprt->xpt_flags);
+		if (test_bit(XPT_BUSY, &xprt->xpt_flags)) {
+			/* Waiting to be processed, but no threads left,
+			 * So just remove it from the waiting list
+			 */
+			list_del_init(&xprt->xpt_ready);
+			clear_bit(XPT_BUSY, &xprt->xpt_flags);
+		}
+		svc_close_xprt(xprt);
 	}
-	svc_close_socket(svsk);
 }
 
 /*

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 18/38] svc: Move sk_reserved to svc_xprt
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (16 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 17/38] svc: Make close transport independent Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 19/38] svc: Make the enqueue service transport neutral and export it Tom Tucker
                     ` (19 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


This functionally trivial patch moves the sk_reserved field to the 
transport independent svc_xprt structure. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    1 +
 include/linux/sunrpc/svcsock.h  |    2 --
 net/sunrpc/svcsock.c            |   10 +++++-----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 21fa6ad..c9892d5 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -53,6 +53,7 @@ struct svc_xprt {
 
 	struct svc_pool		*xpt_pool;	/* current pool iff queued */
 	struct svc_serv		*xpt_server;	/* service for transport */
+	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 060508b..ba41f11 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -20,8 +20,6 @@ struct svc_sock {
 	struct socket *		sk_sock;	/* berkeley socket layer */
 	struct sock *		sk_sk;		/* INET layer */
 
-	atomic_t    	    	sk_reserved;	/* space on outq that is reserved */
-
 	spinlock_t		sk_lock;	/* protects sk_deferred and
 						 * sk_info_authunix */
 	struct list_head	sk_deferred;	/* deferred requests that need to
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 14f72c0..19d0dee 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -304,7 +304,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
 		rqstp->rq_sock = svsk;
 		svc_xprt_get(&svsk->sk_xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
-		atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
+		atomic_add(rqstp->rq_reserved, &svsk->sk_xprt.xpt_reserved);
 		BUG_ON(svsk->sk_xprt.xpt_pool != pool);
 		wake_up(&rqstp->rq_wait);
 	} else {
@@ -369,7 +369,7 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 
 	if (space < rqstp->rq_reserved) {
 		struct svc_sock *svsk = rqstp->rq_sock;
-		atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved);
+		atomic_sub((rqstp->rq_reserved - space), &svsk->sk_xprt.xpt_reserved);
 		rqstp->rq_reserved = space;
 
 		svc_sock_enqueue(svsk);
@@ -897,7 +897,7 @@ static int svc_udp_has_wspace(struct svc_xprt *xprt)
 	 * sock space.
 	 */
 	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
+	required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
 	if (required*2 > sock_wspace(svsk->sk_sk))
 		return 0;
 	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
@@ -1344,7 +1344,7 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
 	 * sock space.
 	 */
 	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
+	required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
 	if (required*2 > sk_stream_wspace(svsk->sk_sk))
 		return 0;
 	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
@@ -1561,7 +1561,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 		rqstp->rq_sock = svsk;
 		svc_xprt_get(&svsk->sk_xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
-		atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
+		atomic_add(rqstp->rq_reserved, &svsk->sk_xprt.xpt_reserved);
 	} else {
 		/* No data pending. Go to sleep */
 		svc_thread_enqueue(pool, rqstp);

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 19/38] svc: Make the enqueue service transport neutral and export it.
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (17 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 18/38] svc: Move sk_reserved to svc_xprt Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 20/38] svc: Make svc_send transport neutral Tom Tucker
                     ` (18 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


The svc_sock_enqueue function is now transport independent since all of
the fields it touches have been moved to the transport independent svc_xprt
structure. Change the function to use the svc_xprt structure directly
instead of the transport specific svc_sock structure.

Transport specific data-ready handlers need to call this function, so
export it.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 net/sunrpc/svcsock.c |   97 ++++++++++++++++++++++++++------------------------
 1 files changed, 50 insertions(+), 47 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 19d0dee..17feb4b 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -5,7 +5,7 @@
  *
  * The server scheduling algorithm does not always distribute the load
  * evenly when servicing a single client. May need to modify the
- * svc_sock_enqueue procedure...
+ * svc_xprt_enqueue procedure...
  *
  * TCP support is largely untested and may be a little slow. The problem
  * is that we currently do two separate recvfrom's, one for the 4-byte
@@ -63,7 +63,7 @@
  *	providing that certain rules are followed:
  *
  *	XPT_CONN, XPT_DATA, can be set or cleared at any time.
- *		after a set, svc_sock_enqueue must be called.
+ *		after a set, svc_xprt_enqueue must be called.
  *		after a clear, the socket must be read/accepted
  *		 if this succeeds, it must be set again.
  *	XPT_CLOSE can set at any time. It is never cleared.
@@ -228,22 +228,22 @@ svc_sock_wspace(struct svc_sock *svsk)
  * processes, wake 'em up.
  *
  */
-static void
-svc_sock_enqueue(struct svc_sock *svsk)
+void
+svc_xprt_enqueue(struct svc_xprt *xprt)
 {
-	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
+	struct svc_serv	*serv = xprt->xpt_server;
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp;
 	int cpu;
 
-	if (!(svsk->sk_xprt.xpt_flags &
+	if (!(xprt->xpt_flags &
 	      ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
 		return;
-	if (test_bit(XPT_DEAD, &svsk->sk_xprt.xpt_flags))
+	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
 		return;
 
 	cpu = get_cpu();
-	pool = svc_pool_for_cpu(svsk->sk_xprt.xpt_server, cpu);
+	pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
 	put_cpu();
 
 	spin_lock_bh(&pool->sp_lock);
@@ -251,11 +251,12 @@ svc_sock_enqueue(struct svc_sock *svsk)
 	if (!list_empty(&pool->sp_threads) &&
 	    !list_empty(&pool->sp_sockets))
 		printk(KERN_ERR
-			"svc_sock_enqueue: threads and sockets both waiting??\n");
+		       "svc_xprt_enqueue: "
+		       "threads and transports both waiting??\n");
 
-	if (test_bit(XPT_DEAD, &svsk->sk_xprt.xpt_flags)) {
+	if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
 		/* Don't enqueue dead sockets */
-		dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk);
+		dprintk("svc: transport %p is dead, not enqueued\n", xprt);
 		goto out_unlock;
 	}
 
@@ -264,28 +265,29 @@ svc_sock_enqueue(struct svc_sock *svsk)
 	 * on the idle list.  We update XPT_BUSY atomically because
 	 * it also guards against trying to enqueue the svc_sock twice.
 	 */
-	if (test_and_set_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)) {
+	if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
 		/* Don't enqueue socket while already enqueued */
-		dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
+		dprintk("svc: transport %p busy, not enqueued\n", xprt);
 		goto out_unlock;
 	}
-	BUG_ON(svsk->sk_xprt.xpt_pool != NULL);
-	svsk->sk_xprt.xpt_pool = pool;
+	BUG_ON(xprt->xpt_pool != NULL);
+	xprt->xpt_pool = pool;
 
 	/* Handle pending connection */
-	if (test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags))
+	if (test_bit(XPT_CONN, &xprt->xpt_flags))
 		goto process;
 
 	/* Handle close in-progress */
-	if (test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags))
+	if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
 		goto process;
 
 	/* Check if we have space to reply to a request */
-	if (!svsk->sk_xprt.xpt_ops->xpo_has_wspace(&svsk->sk_xprt)) {
+	if (!xprt->xpt_ops->xpo_has_wspace(xprt)) {
 		/* Don't enqueue while not enough space for reply */
-		dprintk("svc: no write space, socket %p  not enqueued\n", svsk);
-		svsk->sk_xprt.xpt_pool = NULL;
-		clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
+		dprintk("svc: no write space, transport %p  not enqueued\n",
+			xprt);
+		xprt->xpt_pool = NULL;
+		clear_bit(XPT_BUSY, &xprt->xpt_flags);
 		goto out_unlock;
 	}
 
@@ -294,28 +296,29 @@ svc_sock_enqueue(struct svc_sock *svsk)
 		rqstp = list_entry(pool->sp_threads.next,
 				   struct svc_rqst,
 				   rq_list);
-		dprintk("svc: socket %p served by daemon %p\n",
-			svsk->sk_sk, rqstp);
+		dprintk("svc: transport %p served by daemon %p\n",
+			xprt, rqstp);
 		svc_thread_dequeue(pool, rqstp);
-		if (rqstp->rq_sock)
+		if (rqstp->rq_xprt)
 			printk(KERN_ERR
-				"svc_sock_enqueue: server %p, rq_sock=%p!\n",
-				rqstp, rqstp->rq_sock);
-		rqstp->rq_sock = svsk;
-		svc_xprt_get(&svsk->sk_xprt);
+				"svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
+				rqstp, rqstp->rq_xprt);
+		rqstp->rq_xprt = xprt;
+		svc_xprt_get(xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
-		atomic_add(rqstp->rq_reserved, &svsk->sk_xprt.xpt_reserved);
-		BUG_ON(svsk->sk_xprt.xpt_pool != pool);
+		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+		BUG_ON(xprt->xpt_pool != pool);
 		wake_up(&rqstp->rq_wait);
 	} else {
-		dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
-		list_add_tail(&svsk->sk_xprt.xpt_ready, &pool->sp_sockets);
-		BUG_ON(svsk->sk_xprt.xpt_pool != pool);
+		dprintk("svc: transport %p put into queue\n", xprt);
+		list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
+		BUG_ON(xprt->xpt_pool != pool);
 	}
 
 out_unlock:
 	spin_unlock_bh(&pool->sp_lock);
 }
+EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
 
 /*
  * Dequeue the first socket.  Must be called with the pool->sp_lock held.
@@ -349,7 +352,7 @@ svc_sock_received(struct svc_sock *svsk)
 {
 	svsk->sk_xprt.xpt_pool = NULL;
 	clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
-	svc_sock_enqueue(svsk);
+	svc_xprt_enqueue(&svsk->sk_xprt);
 }
 
 
@@ -368,11 +371,11 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 	space += rqstp->rq_res.head[0].iov_len;
 
 	if (space < rqstp->rq_reserved) {
-		struct svc_sock *svsk = rqstp->rq_sock;
-		atomic_sub((rqstp->rq_reserved - space), &svsk->sk_xprt.xpt_reserved);
+		struct svc_xprt *xprt = rqstp->rq_xprt;
+		atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
 		rqstp->rq_reserved = space;
 
-		svc_sock_enqueue(svsk);
+		svc_xprt_enqueue(xprt);
 	}
 }
 
@@ -700,7 +703,7 @@ svc_udp_data_ready(struct sock *sk, int count)
 			svsk, sk, count,
 			test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-		svc_sock_enqueue(svsk);
+		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 		wake_up_interruptible(sk->sk_sleep);
@@ -717,7 +720,7 @@ svc_write_space(struct sock *sk)
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
 			svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
-		svc_sock_enqueue(svsk);
+		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
 
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
@@ -989,7 +992,7 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
 	if (sk->sk_state == TCP_LISTEN) {
 		if (svsk) {
 			set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
-			svc_sock_enqueue(svsk);
+			svc_xprt_enqueue(&svsk->sk_xprt);
 		} else
 			printk("svc: socket %p: no user data\n", sk);
 	}
@@ -1013,7 +1016,7 @@ svc_tcp_state_change(struct sock *sk)
 		printk("svc: socket %p: no user data\n", sk);
 	else {
 		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
-		svc_sock_enqueue(svsk);
+		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 		wake_up_interruptible_all(sk->sk_sleep);
@@ -1028,7 +1031,7 @@ svc_tcp_data_ready(struct sock *sk, int count)
 		sk, sk->sk_user_data);
 	if (svsk) {
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-		svc_sock_enqueue(svsk);
+		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 		wake_up_interruptible(sk->sk_sleep);
@@ -1080,7 +1083,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	}
 
 	set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
-	svc_sock_enqueue(svsk);
+	svc_xprt_enqueue(&svsk->sk_xprt);
 
 	err = kernel_getpeername(newsock, sin, &slen);
 	if (err < 0) {
@@ -1316,7 +1319,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 		       (sent<0)?"got error":"sent only",
 		       sent, xbufp->len);
 		set_bit(XPT_CLOSE, &rqstp->rq_sock->sk_xprt.xpt_flags);
-		svc_sock_enqueue(rqstp->rq_sock);
+		svc_xprt_enqueue(rqstp->rq_xprt);
 		sent = -EAGAIN;
 	}
 	return sent;
@@ -1493,7 +1496,7 @@ svc_check_conn_limits(struct svc_serv *serv)
 		spin_unlock_bh(&serv->sv_lock);
 
 		if (svsk) {
-			svc_sock_enqueue(svsk);
+			svc_xprt_enqueue(&svsk->sk_xprt);
 			svc_xprt_put(&svsk->sk_xprt);
 		}
 	}
@@ -1726,7 +1729,7 @@ svc_age_temp_sockets(unsigned long closure)
 			svsk, get_seconds() - svsk->sk_lastrecv);
 
 		/* a thread will dequeue and close it soon */
-		svc_sock_enqueue(svsk);
+		svc_xprt_enqueue(&svsk->sk_xprt);
 		svc_xprt_put(&svsk->sk_xprt);
 	}
 
@@ -2008,7 +2011,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 	list_add(&dr->handle.recent, &svsk->sk_deferred);
 	spin_unlock(&svsk->sk_lock);
 	set_bit(XPT_DEFERRED, &svsk->sk_xprt.xpt_flags);
-	svc_sock_enqueue(svsk);
+	svc_xprt_enqueue(&svsk->sk_xprt);
 	svc_xprt_put(&svsk->sk_xprt);
 }
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 20/38] svc: Make svc_send transport neutral
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (18 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 19/38] svc: Make the enqueue service transport neutral and export it Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 21/38] svc: Change svc_sock_received to svc_xprt_received and export it Tom Tucker
                     ` (17 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Move the sk_mutex field to the transport independent svc_xprt structure.
Now all the fields that svc_send touches are transport neutral. Change the 
svc_send function to use the transport independent svc_xprt directly instead 
of the transport dependent svc_sock structure.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    1 +
 include/linux/sunrpc/svcsock.h  |    1 -
 net/sunrpc/svc_xprt.c           |    1 +
 net/sunrpc/svcsock.c            |   19 ++++++++-----------
 4 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index c9892d5..d5ef902 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -54,6 +54,7 @@ struct svc_xprt {
 	struct svc_pool		*xpt_pool;	/* current pool iff queued */
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
+	struct mutex		xpt_mutex;	/* to serialize sending data */
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index ba41f11..41c2dfa 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -24,7 +24,6 @@ struct svc_sock {
 						 * sk_info_authunix */
 	struct list_head	sk_deferred;	/* deferred requests that need to
 						 * be revisted */
-	struct mutex		sk_mutex;	/* to serialize sending data */
 
 	/* We keep the old state_change and data_ready CB's here */
 	void			(*sk_ostate)(struct sock *);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index bbdada7..7544102 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -112,6 +112,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	xprt->xpt_server = serv;
 	INIT_LIST_HEAD(&xprt->xpt_list);
 	INIT_LIST_HEAD(&xprt->xpt_ready);
+	mutex_init(&xprt->xpt_mutex);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 17feb4b..5666541 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1650,15 +1650,13 @@ svc_drop(struct svc_rqst *rqstp)
 int
 svc_send(struct svc_rqst *rqstp)
 {
-	struct svc_sock	*svsk;
+	struct svc_xprt	*xprt;
 	int		len;
 	struct xdr_buf	*xb;
 
-	if ((svsk = rqstp->rq_sock) == NULL) {
-		printk(KERN_WARNING "NULL socket pointer in %s:%d\n",
-				__FILE__, __LINE__);
+	xprt = rqstp->rq_xprt;
+	if (!xprt)
 		return -EFAULT;
-	}
 
 	/* release the receive skb before sending the reply */
 	rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
@@ -1669,13 +1667,13 @@ svc_send(struct svc_rqst *rqstp)
 		xb->page_len +
 		xb->tail[0].iov_len;
 
-	/* Grab svsk->sk_mutex to serialize outgoing data. */
-	mutex_lock(&svsk->sk_mutex);
-	if (test_bit(XPT_DEAD, &svsk->sk_xprt.xpt_flags))
+	/* Grab mutex to serialize outgoing data. */
+	mutex_lock(&xprt->xpt_mutex);
+	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
 		len = -ENOTCONN;
 	else
-		len = svsk->sk_xprt.xpt_ops->xpo_sendto(rqstp);
-	mutex_unlock(&svsk->sk_mutex);
+		len = xprt->xpt_ops->xpo_sendto(rqstp);
+	mutex_unlock(&xprt->xpt_mutex);
 	svc_sock_release(rqstp);
 
 	if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
@@ -1777,7 +1775,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	svsk->sk_lastrecv = get_seconds();
 	spin_lock_init(&svsk->sk_lock);
 	INIT_LIST_HEAD(&svsk->sk_deferred);
-	mutex_init(&svsk->sk_mutex);
 
 	/* Initialize the socket */
 	if (sock->type == SOCK_DGRAM)

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 21/38] svc: Change svc_sock_received to svc_xprt_received and export it
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (19 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 20/38] svc: Make svc_send transport neutral Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
       [not found]     ` <20071129224037.14563.69171.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  2007-11-29 22:40   ` [RFC,PATCH 22/38] svc: Remove sk_lastrecv Tom Tucker
                     ` (16 subsequent siblings)
  37 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


All fields touched by svc_sock_received are now transport independent.
Change it to use svc_xprt directly. This function is called from
transport dependent code, so export it. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    2 +-
 net/sunrpc/svcsock.c            |   37 ++++++++++++++++++-------------------
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index d5ef902..c416d05 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -62,8 +62,8 @@ int	svc_unreg_xprt_class(struct svc_xprt_class *);
 void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
 		      struct svc_serv *);
 int	svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
+void	svc_xprt_received(struct svc_xprt *);
 void	svc_xprt_put(struct svc_xprt *xprt);
-
 static inline void svc_xprt_get(struct svc_xprt *xprt)
 {
 	kref_get(&xprt->xpt_ref);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5666541..0015839 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -347,14 +347,14 @@ svc_sock_dequeue(struct svc_pool *pool)
  * Note: XPT_DATA only gets cleared when a read-attempt finds
  * no (or insufficient) data.
  */
-static inline void
-svc_sock_received(struct svc_sock *svsk)
+void
+svc_xprt_received(struct svc_xprt *xprt)
 {
-	svsk->sk_xprt.xpt_pool = NULL;
-	clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
-	svc_xprt_enqueue(&svsk->sk_xprt);
+	xprt->xpt_pool = NULL;
+	clear_bit(XPT_BUSY, &xprt->xpt_flags);
+	svc_xprt_enqueue(xprt);
 }
-
+EXPORT_SYMBOL_GPL(svc_xprt_received);
 
 /**
  * svc_reserve - change the space reserved for the reply to a request.
@@ -783,7 +783,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 				(serv->sv_nrthreads+3) * serv->sv_max_mesg);
 
 	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
-		svc_sock_received(svsk);
+		svc_xprt_received(&svsk->sk_xprt);
 		return svc_deferred_recv(rqstp);
 	}
 
@@ -800,7 +800,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 			dprintk("svc: recvfrom returned error %d\n", -err);
 			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		}
-		svc_sock_received(svsk);
+		svc_xprt_received(&svsk->sk_xprt);
 		return -EAGAIN;
 	}
 	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
@@ -815,7 +815,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 	/*
 	 * Maybe more packets - kick another thread ASAP.
 	 */
-	svc_sock_received(svsk);
+	svc_xprt_received(&svsk->sk_xprt);
 
 	len  = skb->len - sizeof(struct udphdr);
 	rqstp->rq_arg.len = len;
@@ -1123,8 +1123,6 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	}
 	memcpy(&newsvsk->sk_local, sin, slen);
 
-	svc_sock_received(newsvsk);
-
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpconn++;
 
@@ -1153,7 +1151,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
 
 	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
-		svc_sock_received(svsk);
+		svc_xprt_received(&svsk->sk_xprt);
 		return svc_deferred_recv(rqstp);
 	}
 
@@ -1193,7 +1191,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		if (len < want) {
 			dprintk("svc: short recvfrom while reading record length (%d of %lu)\n",
 				len, want);
-			svc_sock_received(svsk);
+			svc_xprt_received(&svsk->sk_xprt);
 			return -EAGAIN; /* record header not complete */
 		}
 
@@ -1229,7 +1227,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	if (len < svsk->sk_reclen) {
 		dprintk("svc: incomplete TCP record (%d of %d)\n",
 			len, svsk->sk_reclen);
-		svc_sock_received(svsk);
+		svc_xprt_received(&svsk->sk_xprt);
 		return -EAGAIN;	/* record not complete */
 	}
 	len = svsk->sk_reclen;
@@ -1269,7 +1267,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	svsk->sk_reclen = 0;
 	svsk->sk_tcplen = 0;
 
-	svc_sock_received(svsk);
+	svc_xprt_received(&svsk->sk_xprt);
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpcnt++;
 
@@ -1282,7 +1280,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
  error:
 	if (len == -EAGAIN) {
 		dprintk("RPC: TCP recvfrom got EAGAIN\n");
-		svc_sock_received(svsk);
+		svc_xprt_received(&svsk->sk_xprt);
 	} else {
 		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
 		       svsk->sk_xprt.xpt_server->sv_name, -len);
@@ -1607,8 +1605,9 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 			 */
 			__module_get(newxpt->xpt_class->xcl_owner);
 			svc_check_conn_limits(svsk->sk_xprt.xpt_server);
+			svc_xprt_received(newxpt);
 		}
-		svc_sock_received(svsk);
+		svc_xprt_received(&svsk->sk_xprt);
 	} else {
 		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
 			rqstp, pool->sp_id, svsk,
@@ -1827,7 +1826,7 @@ int svc_addsock(struct svc_serv *serv,
 	else {
 		svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS);
 		if (svsk) {
-			svc_sock_received(svsk);
+			svc_xprt_received(&svsk->sk_xprt);
 			err = 0;
 		}
 	}
@@ -1882,7 +1881,7 @@ svc_create_socket(struct svc_serv *serv, int protocol,
 	}
 
 	if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
-		svc_sock_received(svsk);
+		svc_xprt_received(&svsk->sk_xprt);
 		return (struct svc_xprt *)svsk;
 	}
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 22/38] svc: Remove sk_lastrecv
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (20 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 21/38] svc: Change svc_sock_received to svc_xprt_received and export it Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 24/38] svc: Make deferral processing xprt independent Tom Tucker
                     ` (15 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


With the implementation of the new mark and sweep algorithm for shutting
down old connections, the sk_lastrecv field is no longer needed.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svcsock.h |    1 -
 net/sunrpc/svcsock.c           |    5 +----
 2 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 41c2dfa..406d003 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -33,7 +33,6 @@ struct svc_sock {
 	/* private TCP part */
 	int			sk_reclen;	/* length of record */
 	int			sk_tcplen;	/* current read length */
-	time_t			sk_lastrecv;	/* time of last received request */
 
 	/* cache of various info for TCP sockets */
 	void			*sk_info_authunix;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0015839..a1e8c1a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1622,7 +1622,6 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 		svc_sock_release(rqstp);
 		return -EAGAIN;
 	}
-	svsk->sk_lastrecv = get_seconds();
 	clear_bit(XPT_OLD, &svsk->sk_xprt.xpt_flags);
 
 	rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
@@ -1722,8 +1721,7 @@ svc_age_temp_sockets(unsigned long closure)
 		list_del_init(le);
 		svsk = list_entry(le, struct svc_sock, sk_xprt.xpt_list);
 
-		dprintk("queuing svsk %p for closing, %lu seconds old\n",
-			svsk, get_seconds() - svsk->sk_lastrecv);
+		dprintk("queuing svsk %p for closing\n", svsk);
 
 		/* a thread will dequeue and close it soon */
 		svc_xprt_enqueue(&svsk->sk_xprt);
@@ -1771,7 +1769,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	svsk->sk_ostate = inet->sk_state_change;
 	svsk->sk_odata = inet->sk_data_ready;
 	svsk->sk_owspace = inet->sk_write_space;
-	svsk->sk_lastrecv = get_seconds();
 	spin_lock_init(&svsk->sk_lock);
 	INIT_LIST_HEAD(&svsk->sk_deferred);
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 24/38] svc: Make deferral processing xprt independent
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (21 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 22/38] svc: Remove sk_lastrecv Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 25/38] svc: Move the sockaddr information to svc_xprt Tom Tucker
                     ` (14 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


This patch moves the transport independent sk_deferred list to the svc_xprt
structure and updates the svc_deferred_req structure to keep pointers to
svc_xprt's directly. The deferral processing code is also moved out of the
transport dependent recvfrom functions and into the generic svc_recv path.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc.h      |    2 +
 include/linux/sunrpc/svc_xprt.h |    2 +
 include/linux/sunrpc/svcsock.h  |    3 --
 net/sunrpc/svc_xprt.c           |    1 +
 net/sunrpc/svcsock.c            |   58 +++++++++++++++++----------------------
 5 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index cfb2652..40adc9d 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -320,7 +320,7 @@ static inline void svc_free_res_pages(struct svc_rqst *rqstp)
 
 struct svc_deferred_req {
 	u32			prot;	/* protocol (UDP or TCP) */
-	struct svc_sock		*svsk;
+	struct svc_xprt		*xprt;
 	struct sockaddr_storage	addr;	/* where reply must go */
 	size_t			addrlen;
 	union svc_addr_u	daddr;	/* where reply must come from */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index dfb1d4d..d93ae27 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -59,6 +59,8 @@ struct svc_xprt {
 	spinlock_t		xpt_lock;	/* protects sk_deferred
 						 * and xpt_auth_cache */
 	void			*xpt_auth_cache;/* auth cache */
+	struct list_head	xpt_deferred;	/* deferred requests that need
+						 * to be revisted */
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index f2ed6a2..96a229e 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -20,9 +20,6 @@ struct svc_sock {
 	struct socket *		sk_sock;	/* berkeley socket layer */
 	struct sock *		sk_sk;		/* INET layer */
 
-	struct list_head	sk_deferred;	/* deferred requests that need to
-						 * be revisted */
-
 	/* We keep the old state_change and data_ready CB's here */
 	void			(*sk_ostate)(struct sock *);
 	void			(*sk_odata)(struct sock *, int bytes);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 79009c2..fdf0d8c 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -115,6 +115,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	xprt->xpt_server = serv;
 	INIT_LIST_HEAD(&xprt->xpt_list);
 	INIT_LIST_HEAD(&xprt->xpt_ready);
+	INIT_LIST_HEAD(&xprt->xpt_deferred);
 	mutex_init(&xprt->xpt_mutex);
 	spin_lock_init(&xprt->xpt_lock);
 }
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d149f4e..62b5225 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -89,7 +89,7 @@ static void		svc_close_xprt(struct svc_xprt *xprt);
 static void		svc_sock_detach(struct svc_xprt *);
 static void		svc_sock_free(struct svc_xprt *);
 
-static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
+static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static struct svc_xprt *
@@ -786,11 +786,6 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 				(serv->sv_nrthreads+3) * serv->sv_max_mesg,
 				(serv->sv_nrthreads+3) * serv->sv_max_mesg);
 
-	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
-		svc_xprt_received(&svsk->sk_xprt);
-		return svc_deferred_recv(rqstp);
-	}
-
 	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 	skb = NULL;
 	err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
@@ -1155,11 +1150,6 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
 		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
 
-	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
-		svc_xprt_received(&svsk->sk_xprt);
-		return svc_deferred_recv(rqstp);
-	}
-
 	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
 		/* sndbuf needs to have room for one request
 		 * per thread, otherwise we can stall even when the
@@ -1617,7 +1607,12 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
 			rqstp, pool->sp_id, svsk,
 			atomic_read(&svsk->sk_xprt.xpt_ref.refcount));
-		len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
+		rqstp->rq_deferred = svc_deferred_dequeue(&svsk->sk_xprt);
+		if (rqstp->rq_deferred) {
+			svc_xprt_received(&svsk->sk_xprt);
+			len = svc_deferred_recv(rqstp);
+		} else
+			len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
 		dprintk("svc: got len=%d\n", len);
 	}
 
@@ -1774,7 +1769,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	svsk->sk_ostate = inet->sk_state_change;
 	svsk->sk_odata = inet->sk_data_ready;
 	svsk->sk_owspace = inet->sk_write_space;
-	INIT_LIST_HEAD(&svsk->sk_deferred);
 
 	/* Initialize the socket */
 	if (sock->type == SOCK_DGRAM)
@@ -1992,22 +1986,21 @@ void svc_close_all(struct list_head *xprt_list)
 static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 {
 	struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle);
-	struct svc_sock *svsk;
+	struct svc_xprt *xprt = dr->xprt;
 
 	if (too_many) {
-		svc_xprt_put(&dr->svsk->sk_xprt);
+		svc_xprt_put(xprt);
 		kfree(dr);
 		return;
 	}
 	dprintk("revisit queued\n");
-	svsk = dr->svsk;
-	dr->svsk = NULL;
-	spin_lock(&svsk->sk_xprt.xpt_lock);
-	list_add(&dr->handle.recent, &svsk->sk_deferred);
-	spin_unlock(&svsk->sk_xprt.xpt_lock);
-	set_bit(XPT_DEFERRED, &svsk->sk_xprt.xpt_flags);
-	svc_xprt_enqueue(&svsk->sk_xprt);
-	svc_xprt_put(&svsk->sk_xprt);
+	dr->xprt = NULL;
+	spin_lock(&xprt->xpt_lock);
+	list_add(&dr->handle.recent, &xprt->xpt_deferred);
+	spin_unlock(&xprt->xpt_lock);
+	set_bit(XPT_DEFERRED, &xprt->xpt_flags);
+	svc_xprt_enqueue(xprt);
+	svc_xprt_put(xprt);
 }
 
 static struct cache_deferred_req *
@@ -2038,7 +2031,7 @@ svc_defer(struct cache_req *req)
 		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
 	}
 	svc_xprt_get(rqstp->rq_xprt);
-	dr->svsk = rqstp->rq_sock;
+	dr->xprt = rqstp->rq_xprt;
 
 	dr->handle.revisit = svc_revisit;
 	return &dr->handle;
@@ -2064,22 +2057,21 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
 }
 
 
-static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
+static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
 {
 	struct svc_deferred_req *dr = NULL;
 
-	if (!test_bit(XPT_DEFERRED, &svsk->sk_xprt.xpt_flags))
+	if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags))
 		return NULL;
-	spin_lock(&svsk->sk_xprt.xpt_lock);
-	clear_bit(XPT_DEFERRED, &svsk->sk_xprt.xpt_flags);
-	if (!list_empty(&svsk->sk_deferred)) {
-		dr = list_entry(svsk->sk_deferred.next,
+	spin_lock(&xprt->xpt_lock);
+	clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
+	if (!list_empty(&xprt->xpt_deferred)) {
+		dr = list_entry(xprt->xpt_deferred.next,
 				struct svc_deferred_req,
 				handle.recent);
 		list_del_init(&dr->handle.recent);
-		set_bit(XPT_DEFERRED, &svsk->sk_xprt.xpt_flags);
+		set_bit(XPT_DEFERRED, &xprt->xpt_flags);
 	}
-	spin_unlock(&svsk->sk_xprt.xpt_lock);
+	spin_unlock(&xprt->xpt_lock);
 	return dr;
 }
-

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 25/38] svc: Move the sockaddr information to svc_xprt
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (22 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 24/38] svc: Make deferral processing xprt independent Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
       [not found]     ` <20071129224046.14563.59353.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  2007-11-29 22:40   ` [RFC,PATCH 26/38] svc: Make svc_sock_release svc_xprt_release Tom Tucker
                     ` (13 subsequent siblings)
  37 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


This patch moves the transport sockaddr to the svc_xprt
structure.  Convenience functions are added to set and
get the local and remote addresses of a transport from
the transport provider as well as determine the length
of a sockaddr.

A transport is responsible for setting the xpt_local
and xpt_remote addresses in the svc_xprt structure as
part of transport creation and xpo_accept processing. This
cannot be done in a generic way and in fact varies
between TCP, UDP and RDMA. A set of xpo_ functions
(e.g. getlocalname, getremotename) could have been
added but this would have resulted in additional
caching and copying of the addresses around.  Note that
the xpt_local address should also be set on listening
endpoints; for TCP/RDMA this is done as part of
endpoint creation.

For connected transports like TCP and RDMA, the addresses
never change and can be set once and copied into the
rqstp structure for each request. For UDP, however, the
local and remote addresses may change for each request. In
this case, the address information is obtained from the
UDP recvmsg info and copied into the rqstp structure from
there. 

A svc_xprt_local_port function was also added that returns
the local port given a transport. This is used by
svc_create_xprt when returning the port associated with
a newly created transport, and later when creating a
generic find transport service to check if a service is
already listening on a given port.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |   51 ++++++++++++++++++++++++++++++++++++
 include/linux/sunrpc/svcsock.h  |    4 ---
 net/sunrpc/svc_xprt.c           |   31 ++++++++++++++++++++--
 net/sunrpc/svcsock.c            |   56 +++++++++++++++++++++------------------
 4 files changed, 110 insertions(+), 32 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index d93ae27..60bdffc 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -61,6 +61,10 @@ struct svc_xprt {
 	void			*xpt_auth_cache;/* auth cache */
 	struct list_head	xpt_deferred;	/* deferred requests that need
 						 * to be revisted */
+	struct sockaddr_storage	xpt_local;	/* local address */
+	int			xpt_locallen;	/* length of address */
+	struct sockaddr_storage	xpt_remote;	/* remote peer's address */
+	int			xpt_remotelen;	/* length of address */
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
@@ -70,9 +74,56 @@ void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
 int	svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
 void	svc_xprt_received(struct svc_xprt *);
 void	svc_xprt_put(struct svc_xprt *xprt);
+void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
 static inline void svc_xprt_get(struct svc_xprt *xprt)
 {
 	kref_get(&xprt->xpt_ref);
 }
+static inline void svc_xprt_set_local(struct svc_xprt *xprt,
+				      struct sockaddr *sa, int salen)
+{
+	memcpy(&xprt->xpt_local, sa, salen);
+	xprt->xpt_locallen = salen;
+}
+static inline void svc_xprt_set_remote(struct svc_xprt *xprt,
+				       struct sockaddr *sa, int salen)
+{
+	memcpy(&xprt->xpt_remote, sa, salen);
+	xprt->xpt_remotelen = salen;
+}
+static inline int svc_addr_port(struct sockaddr *sa)
+{
+	int ret = -1;
+	switch (sa->sa_family) {
+	case AF_INET:
+		ret = ntohs(((struct sockaddr_in *)sa)->sin_port);
+		break;
+	case AF_INET6:
+		ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
+		break;
+	}
+	return ret;
+}
+
+static inline int svc_addr_len(struct sockaddr *sa)
+{
+	switch (sa->sa_family) {
+	case AF_INET:
+		return sizeof(struct sockaddr_in);
+	case AF_INET6:
+		return sizeof(struct sockaddr_in6);
+	}
+	return -ENOTSUPP;
+}
+
+static inline int svc_xprt_local_port(struct svc_xprt *xprt)
+{
+	return svc_addr_port((struct sockaddr *)&xprt->xpt_local);
+}
+
+static inline int svc_xprt_remote_port(struct svc_xprt *xprt)
+{
+	return svc_addr_port((struct sockaddr *)&xprt->xpt_remote);
+}
 
 #endif /* SUNRPC_SVC_XPRT_H */
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 96a229e..206f092 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -28,10 +28,6 @@ struct svc_sock {
 	/* private TCP part */
 	int			sk_reclen;	/* length of record */
 	int			sk_tcplen;	/* current read length */
-
-	struct sockaddr_storage	sk_local;	/* local address */
-	struct sockaddr_storage	sk_remote;	/* remote peer's address */
-	int			sk_remotelen;	/* length of address */
 };
 
 /*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index fdf0d8c..d0cbfe0 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -138,7 +138,6 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
 			spin_unlock(&svc_xprt_class_lock);
 			if (try_module_get(xcl->xcl_owner)) {
 				struct svc_xprt *newxprt;
-				ret = 0;
 				newxprt = xcl->xcl_ops->xpo_create
 					(serv,
 					 (struct sockaddr *)&sin, sizeof(sin),
@@ -146,7 +145,8 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
 				if (IS_ERR(newxprt)) {
 					module_put(xcl->xcl_owner);
 					ret = PTR_ERR(newxprt);
-				}
+				} else
+					ret = svc_xprt_local_port(newxprt);
 			}
 			goto out;
 		}
@@ -157,3 +157,30 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
 	return ret;
 }
 EXPORT_SYMBOL_GPL(svc_create_xprt);
+
+/*
+ * Copy the local and remote xprt addresses to the rqstp structure
+ */
+void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt)
+{
+	struct sockaddr *sin;
+
+	memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen);
+	rqstp->rq_addrlen = xprt->xpt_remotelen;
+
+	/*
+	 * Destination address in request is needed for binding the
+	 * source address in RPC replies/callbacks later.
+	 */
+	sin = (struct sockaddr *)&xprt->xpt_local;
+	switch (sin->sa_family) {
+	case AF_INET:
+		rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
+		break;
+	case AF_INET6:
+		rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs);
+
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 62b5225..e8cfeeb 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -638,33 +638,13 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
 	struct msghdr msg = {
 		.msg_flags	= MSG_DONTWAIT,
 	};
-	struct sockaddr *sin;
 	int len;
 
 	len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
 				msg.msg_flags);
 
-	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
-	 */
-	memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen);
-	rqstp->rq_addrlen = svsk->sk_remotelen;
-
-	/* Destination address in request is needed for binding the
-	 * source address in RPC callbacks later.
-	 */
-	sin = (struct sockaddr *)&svsk->sk_local;
-	switch (sin->sa_family) {
-	case AF_INET:
-		rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
-		break;
-	case AF_INET6:
-		rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
-		break;
-	}
-
 	dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
 		svsk, iov[0].iov_base, iov[0].iov_len, len);
-
 	return len;
 }
 
@@ -734,8 +714,15 @@ svc_write_space(struct sock *sk)
 	}
 }
 
-static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp,
-					    struct cmsghdr *cmh)
+/*
+ * Copy the UDP datagram's destination address to the rqstp structure.
+ * The 'destination' address in this case is the address to which the
+ * peer sent the datagram, i.e. our local address. For multihomed
+ * hosts, this can change from msg to msg. Note that only the IP
+ * address changes, the port number should remain the same.
+ */
+static void svc_udp_get_dest_address(struct svc_rqst *rqstp,
+				     struct cmsghdr *cmh)
 {
 	switch (rqstp->rq_sock->sk_sk->sk_family) {
 	case AF_INET: {
@@ -802,7 +789,10 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 		svc_xprt_received(&svsk->sk_xprt);
 		return -EAGAIN;
 	}
-	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
+	len = svc_addr_len(svc_addr(rqstp));
+	if (len < 0)
+		return len;
+	rqstp->rq_addrlen = len;
 	if (skb->tstamp.tv64 == 0) {
 		skb->tstamp = ktime_get_real();
 		/* Don't enable netstamp, sunrpc doesn't
@@ -1114,14 +1104,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	if (!(newsvsk = svc_setup_socket(serv, newsock, &err,
 				 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY))))
 		goto failed;
-	memcpy(&newsvsk->sk_remote, sin, slen);
-	newsvsk->sk_remotelen = slen;
+	svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
 	err = kernel_getsockname(newsock, sin, &slen);
 	if (unlikely(err < 0)) {
 		dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
 		slen = offsetof(struct sockaddr, sa_data);
 	}
-	memcpy(&newsvsk->sk_local, sin, slen);
+	svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
 
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpconn++;
@@ -1262,6 +1251,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	svsk->sk_reclen = 0;
 	svsk->sk_tcplen = 0;
 
+	svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
 	svc_xprt_received(&svsk->sk_xprt);
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpcnt++;
@@ -1821,6 +1811,11 @@ int svc_addsock(struct svc_serv *serv,
 	else {
 		svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS);
 		if (svsk) {
+			struct sockaddr_storage addr;
+			struct sockaddr *sin = (struct sockaddr *)&addr;
+			int salen;
+			if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
+				svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
 			svc_xprt_received(&svsk->sk_xprt);
 			err = 0;
 		}
@@ -1846,6 +1841,9 @@ svc_create_socket(struct svc_serv *serv, int protocol,
 	int		error;
 	int		type;
 	char		buf[RPC_MAX_ADDRBUFLEN];
+	struct sockaddr_storage addr;
+	struct sockaddr *newsin = (struct sockaddr *)&addr;
+	int		newlen;
 
 	dprintk("svc: svc_create_socket(%s, %d, %s)\n",
 			serv->sv_program->pg_name, protocol,
@@ -1870,12 +1868,18 @@ svc_create_socket(struct svc_serv *serv, int protocol,
 	if (error < 0)
 		goto bummer;
 
+	newlen = len;
+	error = kernel_getsockname(sock, newsin, &newlen);
+	if (error < 0)
+		goto bummer;
+
 	if (protocol == IPPROTO_TCP) {
 		if ((error = kernel_listen(sock, 64)) < 0)
 			goto bummer;
 	}
 
 	if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
+		svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
 		svc_xprt_received(&svsk->sk_xprt);
 		return (struct svc_xprt *)svsk;
 	}

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 26/38] svc: Make svc_sock_release svc_xprt_release
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (23 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 25/38] svc: Move the sockaddr information to svc_xprt Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 27/38] svc: Make svc_recv transport neutral Tom Tucker
                     ` (12 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


The svc_sock_release function only touches transport independent fields.
Change the function to manipulate svc_xprt directly instead of the transport
dependent svc_sock structure. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 net/sunrpc/svcsock.c |   16 +++++++---------
 1 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e8cfeeb..36fc59c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -383,10 +383,9 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 	}
 }
 
-static void
-svc_sock_release(struct svc_rqst *rqstp)
+static void svc_xprt_release(struct svc_rqst *rqstp)
 {
-	struct svc_sock	*svsk = rqstp->rq_sock;
+	struct svc_xprt	*xprt = rqstp->rq_xprt;
 
 	rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
 
@@ -394,7 +393,6 @@ svc_sock_release(struct svc_rqst *rqstp)
 	rqstp->rq_res.page_len = 0;
 	rqstp->rq_res.page_base = 0;
 
-
 	/* Reset response buffer and release
 	 * the reservation.
 	 * But first, check that enough space was reserved
@@ -407,9 +405,9 @@ svc_sock_release(struct svc_rqst *rqstp)
 
 	rqstp->rq_res.head[0].iov_len = 0;
 	svc_reserve(rqstp, 0);
-	rqstp->rq_sock = NULL;
+	rqstp->rq_xprt = NULL;
 
-	svc_xprt_put(&svsk->sk_xprt);
+	svc_xprt_put(xprt);
 }
 
 /*
@@ -1609,7 +1607,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 	/* No data, incomplete (TCP) read, or accept() */
 	if (len == 0 || len == -EAGAIN) {
 		rqstp->rq_res.len = 0;
-		svc_sock_release(rqstp);
+		svc_xprt_release(rqstp);
 		return -EAGAIN;
 	}
 	clear_bit(XPT_OLD, &svsk->sk_xprt.xpt_flags);
@@ -1629,7 +1627,7 @@ void
 svc_drop(struct svc_rqst *rqstp)
 {
 	dprintk("svc: socket %p dropped request\n", rqstp->rq_sock);
-	svc_sock_release(rqstp);
+	svc_xprt_release(rqstp);
 }
 
 /*
@@ -1662,7 +1660,7 @@ svc_send(struct svc_rqst *rqstp)
 	else
 		len = xprt->xpt_ops->xpo_sendto(rqstp);
 	mutex_unlock(&xprt->xpt_mutex);
-	svc_sock_release(rqstp);
+	svc_xprt_release(rqstp);
 
 	if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
 		return 0;

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 27/38] svc: Make svc_recv transport neutral
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (24 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 26/38] svc: Make svc_sock_release svc_xprt_release Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 28/38] svc: Make svc_age_temp_sockets svc_age_temp_transports Tom Tucker
                     ` (11 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


All of the transport field and functions used by svc_recv are now
transport independent. Change the svc_recv function to use the svc_xprt
structure directly instead of the transport specific svc_sock structure.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 net/sunrpc/svcsock.c |   61 +++++++++++++++++++++++++-------------------------
 1 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 36fc59c..06a43ae 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -327,22 +327,21 @@ EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
 /*
  * Dequeue the first socket.  Must be called with the pool->sp_lock held.
  */
-static inline struct svc_sock *
-svc_sock_dequeue(struct svc_pool *pool)
+static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
 {
-	struct svc_sock	*svsk;
+	struct svc_xprt	*xprt;
 
 	if (list_empty(&pool->sp_sockets))
 		return NULL;
 
-	svsk = list_entry(pool->sp_sockets.next,
-			  struct svc_sock, sk_xprt.xpt_ready);
-	list_del_init(&svsk->sk_xprt.xpt_ready);
+	xprt = list_entry(pool->sp_sockets.next,
+			  struct svc_xprt, xpt_ready);
+	list_del_init(&xprt->xpt_ready);
 
-	dprintk("svc: socket %p dequeued, inuse=%d\n",
-		svsk->sk_sk, atomic_read(&svsk->sk_xprt.xpt_ref.refcount));
+	dprintk("svc: transport %p dequeued, inuse=%d\n",
+		xprt, atomic_read(&xprt->xpt_ref.refcount));
 
-	return svsk;
+	return xprt;
 }
 
 /*
@@ -1491,20 +1490,20 @@ svc_check_conn_limits(struct svc_serv *serv)
 int
 svc_recv(struct svc_rqst *rqstp, long timeout)
 {
-	struct svc_sock		*svsk = NULL;
+	struct svc_xprt		*xprt = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
 	struct svc_pool		*pool = rqstp->rq_pool;
 	int			len, i;
-	int 			pages;
+	int			pages;
 	struct xdr_buf		*arg;
 	DECLARE_WAITQUEUE(wait, current);
 
 	dprintk("svc: server %p waiting for data (to = %ld)\n",
 		rqstp, timeout);
 
-	if (rqstp->rq_sock)
+	if (rqstp->rq_xprt)
 		printk(KERN_ERR
-			"svc_recv: service %p, socket not NULL!\n",
+			"svc_recv: service %p, transport not NULL!\n",
 			 rqstp);
 	if (waitqueue_active(&rqstp->rq_wait))
 		printk(KERN_ERR
@@ -1541,11 +1540,12 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 		return -EINTR;
 
 	spin_lock_bh(&pool->sp_lock);
-	if ((svsk = svc_sock_dequeue(pool)) != NULL) {
-		rqstp->rq_sock = svsk;
-		svc_xprt_get(&svsk->sk_xprt);
+	xprt = svc_xprt_dequeue(pool);
+	if (xprt) {
+		rqstp->rq_xprt = xprt;
+		svc_xprt_get(xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
-		atomic_add(rqstp->rq_reserved, &svsk->sk_xprt.xpt_reserved);
+		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
 	} else {
 		/* No data pending. Go to sleep */
 		svc_thread_enqueue(pool, rqstp);
@@ -1565,7 +1565,8 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 		spin_lock_bh(&pool->sp_lock);
 		remove_wait_queue(&rqstp->rq_wait, &wait);
 
-		if (!(svsk = rqstp->rq_sock)) {
+		xprt = rqstp->rq_xprt;
+		if (!xprt) {
 			svc_thread_dequeue(pool, rqstp);
 			spin_unlock_bh(&pool->sp_lock);
 			dprintk("svc: server %p, no data yet\n", rqstp);
@@ -1575,12 +1576,12 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 	spin_unlock_bh(&pool->sp_lock);
 
 	len = 0;
-	if (test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)) {
+	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
 		dprintk("svc_recv: found XPT_CLOSE\n");
-		svc_delete_xprt(&svsk->sk_xprt);
-	} else if (test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags)) {
+		svc_delete_xprt(xprt);
+	} else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
 		struct svc_xprt *newxpt;
-		newxpt = svsk->sk_xprt.xpt_ops->xpo_accept(&svsk->sk_xprt);
+		newxpt = xprt->xpt_ops->xpo_accept(xprt);
 		if (newxpt) {
 			/*
 			 * We know this module_get will succeed because the
@@ -1590,17 +1591,17 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 			svc_check_conn_limits(svsk->sk_xprt.xpt_server);
 			svc_xprt_received(newxpt);
 		}
-		svc_xprt_received(&svsk->sk_xprt);
+		svc_xprt_received(xprt);
 	} else {
-		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
-			rqstp, pool->sp_id, svsk,
-			atomic_read(&svsk->sk_xprt.xpt_ref.refcount));
-		rqstp->rq_deferred = svc_deferred_dequeue(&svsk->sk_xprt);
+		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
+			rqstp, pool->sp_id, xprt,
+			atomic_read(&xprt->xpt_ref.refcount));
+		rqstp->rq_deferred = svc_deferred_dequeue(xprt);
 		if (rqstp->rq_deferred) {
-			svc_xprt_received(&svsk->sk_xprt);
+			svc_xprt_received(xprt);
 			len = svc_deferred_recv(rqstp);
 		} else
-			len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
+			len = xprt->xpt_ops->xpo_recvfrom(rqstp);
 		dprintk("svc: got len=%d\n", len);
 	}
 
@@ -1610,7 +1611,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 		svc_xprt_release(rqstp);
 		return -EAGAIN;
 	}
-	clear_bit(XPT_OLD, &svsk->sk_xprt.xpt_flags);
+	clear_bit(XPT_OLD, &xprt->xpt_flags);
 
 	rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
 	rqstp->rq_chandle.defer = svc_defer;

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 28/38] svc: Make svc_age_temp_sockets svc_age_temp_transports
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (25 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 27/38] svc: Make svc_recv transport neutral Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 29/38] svc: Move common create logic to common code Tom Tucker
                     ` (10 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


This function is transport independent. Change it to use svc_xprt directly
and change it's name to reflect this.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 net/sunrpc/svcsock.c |   36 +++++++++++++++++++-----------------
 1 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 06a43ae..e4e2b2f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1673,48 +1673,50 @@ svc_send(struct svc_rqst *rqstp)
  * a mark-and-sweep algorithm.
  */
 static void
-svc_age_temp_sockets(unsigned long closure)
+svc_age_temp_xprts(unsigned long closure)
 {
 	struct svc_serv *serv = (struct svc_serv *)closure;
-	struct svc_sock *svsk;
+	struct svc_xprt *xprt;
 	struct list_head *le, *next;
 	LIST_HEAD(to_be_aged);
 
-	dprintk("svc_age_temp_sockets\n");
+	dprintk("svc_age_temp_xprts\n");
 
 	if (!spin_trylock_bh(&serv->sv_lock)) {
 		/* busy, try again 1 sec later */
-		dprintk("svc_age_temp_sockets: busy\n");
+		dprintk("svc_age_temp_xprts: busy\n");
 		mod_timer(&serv->sv_temptimer, jiffies + HZ);
 		return;
 	}
 
 	list_for_each_safe(le, next, &serv->sv_tempsocks) {
-		svsk = list_entry(le, struct svc_sock, sk_xprt.xpt_list);
+		xprt = list_entry(le, struct svc_xprt, xpt_list);
 
-		if (!test_and_set_bit(XPT_OLD, &svsk->sk_xprt.xpt_flags))
+		/* First time through, just mark it OLD. Second time
+		 * through, close it. */
+		if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags))
 			continue;
-		if (atomic_read(&svsk->sk_xprt.xpt_ref.refcount) > 1
-		    || test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags))
+		if (atomic_read(&xprt->xpt_ref.refcount) > 1
+		    || test_bit(XPT_BUSY, &xprt->xpt_flags))
 			continue;
-		svc_xprt_get(&svsk->sk_xprt);
+		svc_xprt_get(xprt);
 		list_move(le, &to_be_aged);
-		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
-		set_bit(XPT_DETACHED, &svsk->sk_xprt.xpt_flags);
+		set_bit(XPT_CLOSE, &xprt->xpt_flags);
+		set_bit(XPT_DETACHED, &xprt->xpt_flags);
 	}
 	spin_unlock_bh(&serv->sv_lock);
 
 	while (!list_empty(&to_be_aged)) {
 		le = to_be_aged.next;
-		/* fiddling the sk_xprt.xpt_list node is safe 'cos we're XPT_DETACHED */
+		/* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */
 		list_del_init(le);
-		svsk = list_entry(le, struct svc_sock, sk_xprt.xpt_list);
+		xprt = list_entry(le, struct svc_xprt, xpt_list);
 
-		dprintk("queuing svsk %p for closing\n", svsk);
+		dprintk("queuing xprt %p for closing\n", xprt);
 
 		/* a thread will dequeue and close it soon */
-		svc_xprt_enqueue(&svsk->sk_xprt);
-		svc_xprt_put(&svsk->sk_xprt);
+		svc_xprt_enqueue(xprt);
+		svc_xprt_put(xprt);
 	}
 
 	mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
@@ -1772,7 +1774,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 		serv->sv_tmpcnt++;
 		if (serv->sv_temptimer.function == NULL) {
 			/* setup timer to age temp sockets */
-			setup_timer(&serv->sv_temptimer, svc_age_temp_sockets,
+			setup_timer(&serv->sv_temptimer, svc_age_temp_xprts,
 					(unsigned long)serv);
 			mod_timer(&serv->sv_temptimer,
 					jiffies + svc_conn_age_period * HZ);

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 29/38] svc: Move common create logic to common code
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (26 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 28/38] svc: Make svc_age_temp_sockets svc_age_temp_transports Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 30/38] svc: Removing remaining references to rq_sock in rqstp Tom Tucker
                     ` (9 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Move the code that adds a transport instance to the sv_tempsocks and
sv_permsocks lists out of the transport specific functions and into core
logic. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 net/sunrpc/svc_xprt.c |    9 ++++++++-
 net/sunrpc/svcsock.c  |   39 +++++++++++++++++++--------------------
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d0cbfe0..924df63 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -145,8 +145,15 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
 				if (IS_ERR(newxprt)) {
 					module_put(xcl->xcl_owner);
 					ret = PTR_ERR(newxprt);
-				} else
+				} else {
+					clear_bit(XPT_TEMP,
+						  &newxprt->xpt_flags);
+					spin_lock_bh(&serv->sv_lock);
+					list_add(&newxprt->xpt_list,
+						 &serv->sv_permsocks);
+					spin_unlock_bh(&serv->sv_lock);
 					ret = svc_xprt_local_port(newxprt);
+				}
 			}
 			goto out;
 		}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e4e2b2f..8f0ceef 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -94,6 +94,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static struct svc_xprt *
 svc_create_socket(struct svc_serv *, int, struct sockaddr *, int, int);
+static void svc_age_temp_xprts(unsigned long closure);
 
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
@@ -1588,7 +1589,20 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 			 * listener holds a reference too
 			 */
 			__module_get(newxpt->xpt_class->xcl_owner);
-			svc_check_conn_limits(svsk->sk_xprt.xpt_server);
+			svc_check_conn_limits(xprt->xpt_server);
+			spin_lock_bh(&serv->sv_lock);
+			set_bit(XPT_TEMP, &newxpt->xpt_flags);
+			list_add(&newxpt->xpt_list, &serv->sv_tempsocks);
+			serv->sv_tmpcnt++;
+			if (serv->sv_temptimer.function == NULL) {
+				/* setup timer to age temp sockets */
+				setup_timer(&serv->sv_temptimer,
+					    svc_age_temp_xprts,
+					    (unsigned long)serv);
+				mod_timer(&serv->sv_temptimer,
+					  jiffies + svc_conn_age_period * HZ);
+			}
+			spin_unlock_bh(&serv->sv_lock);
 			svc_xprt_received(newxpt);
 		}
 		svc_xprt_received(xprt);
@@ -1733,7 +1747,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	struct svc_sock	*svsk;
 	struct sock	*inet;
 	int		pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
-	int		is_temporary = flags & SVC_SOCK_TEMPORARY;
 
 	dprintk("svc: svc_setup_socket %p\n", sock);
 	if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1767,24 +1780,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	else
 		svc_tcp_init(svsk, serv);
 
-	spin_lock_bh(&serv->sv_lock);
-	if (is_temporary) {
-		set_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
-		list_add(&svsk->sk_xprt.xpt_list, &serv->sv_tempsocks);
-		serv->sv_tmpcnt++;
-		if (serv->sv_temptimer.function == NULL) {
-			/* setup timer to age temp sockets */
-			setup_timer(&serv->sv_temptimer, svc_age_temp_xprts,
-					(unsigned long)serv);
-			mod_timer(&serv->sv_temptimer,
-					jiffies + svc_conn_age_period * HZ);
-		}
-	} else {
-		clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
-		list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks);
-	}
-	spin_unlock_bh(&serv->sv_lock);
-
 	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
 				svsk, svsk->sk_sk);
 
@@ -1820,6 +1815,10 @@ int svc_addsock(struct svc_serv *serv,
 			svc_xprt_received(&svsk->sk_xprt);
 			err = 0;
 		}
+		clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
+		spin_lock_bh(&serv->sv_lock);
+		list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks);
+		spin_unlock_bh(&serv->sv_lock);
 	}
 	if (err) {
 		sockfd_put(so);

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 30/38] svc: Removing remaining references to rq_sock in rqstp
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (27 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 29/38] svc: Move common create logic to common code Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:40   ` [RFC,PATCH 31/38] svc: Make svc_check_conn_limits xprt independent Tom Tucker
                     ` (8 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


This functionally empty patch removes rq_sock and unamed union 
from rqstp structure. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc.h |    5 +----
 net/sunrpc/svcsock.c       |   38 ++++++++++++++++++++++++--------------
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 40adc9d..04eb20e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -204,10 +204,7 @@ union svc_addr_u {
 struct svc_rqst {
 	struct list_head	rq_list;	/* idle list */
 	struct list_head	rq_all;		/* all threads list */
-	union {
-		struct svc_xprt *	rq_xprt;	/* transport ptr */
-		struct svc_sock *	rq_sock; 	/* socket ptr */
-	};
+	struct svc_xprt *	rq_xprt;	/* transport ptr */
 	struct sockaddr_storage	rq_addr;	/* peer address */
 	size_t			rq_addrlen;
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 8f0ceef..c4b2a5e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -201,10 +201,12 @@ static void svc_release_skb(struct svc_rqst *rqstp)
 	struct svc_deferred_req *dr = rqstp->rq_deferred;
 
 	if (skb) {
+		struct svc_sock *svsk =
+			container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 		rqstp->rq_xprt_ctxt = NULL;
 
 		dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
-		skb_free_datagram(rqstp->rq_sock->sk_sk, skb);
+		skb_free_datagram(svsk->sk_sk, skb);
 	}
 	if (dr) {
 		rqstp->rq_deferred = NULL;
@@ -433,7 +435,7 @@ svc_wake_up(struct svc_serv *serv)
 			dprintk("svc: daemon %p woken up.\n", rqstp);
 			/*
 			svc_thread_dequeue(pool, rqstp);
-			rqstp->rq_sock = NULL;
+			rqstp->rq_xprt = NULL;
 			 */
 			wake_up(&rqstp->rq_wait);
 		}
@@ -450,7 +452,9 @@ union svc_pktinfo_u {
 
 static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 {
-	switch (rqstp->rq_sock->sk_sk->sk_family) {
+	struct svc_sock *svsk =
+		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
+	switch (svsk->sk_sk->sk_family) {
 	case AF_INET: {
 			struct in_pktinfo *pki = CMSG_DATA(cmh);
 
@@ -483,7 +487,8 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 static int
 svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 {
-	struct svc_sock	*svsk = rqstp->rq_sock;
+	struct svc_sock	*svsk =
+		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 	struct socket	*sock = svsk->sk_sock;
 	int		slen;
 	union {
@@ -556,7 +561,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 	}
 out:
 	dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
-		rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len,
+		svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
 		xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
 
 	return len;
@@ -632,7 +637,8 @@ svc_recv_available(struct svc_sock *svsk)
 static int
 svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
 {
-	struct svc_sock *svsk = rqstp->rq_sock;
+	struct svc_sock *svsk =
+		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 	struct msghdr msg = {
 		.msg_flags	= MSG_DONTWAIT,
 	};
@@ -722,7 +728,9 @@ svc_write_space(struct sock *sk)
 static void svc_udp_get_dest_address(struct svc_rqst *rqstp,
 				     struct cmsghdr *cmh)
 {
-	switch (rqstp->rq_sock->sk_sk->sk_family) {
+	struct svc_sock *svsk =
+		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
+	switch (svsk->sk_sk->sk_family) {
 	case AF_INET: {
 		struct in_pktinfo *pki = CMSG_DATA(cmh);
 		rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
@@ -742,7 +750,8 @@ static void svc_udp_get_dest_address(struct svc_rqst *rqstp,
 static int
 svc_udp_recvfrom(struct svc_rqst *rqstp)
 {
-	struct svc_sock	*svsk = rqstp->rq_sock;
+	struct svc_sock	*svsk =
+		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	struct sk_buff	*skb;
 	union {
@@ -1126,7 +1135,8 @@ failed:
 static int
 svc_tcp_recvfrom(struct svc_rqst *rqstp)
 {
-	struct svc_sock	*svsk = rqstp->rq_sock;
+	struct svc_sock	*svsk =
+		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
 	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	int		len;
 	struct kvec *vec;
@@ -1290,16 +1300,16 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 	reclen = htonl(0x80000000|((xbufp->len ) - 4));
 	memcpy(xbufp->head[0].iov_base, &reclen, 4);
 
-	if (test_bit(XPT_DEAD, &rqstp->rq_sock->sk_xprt.xpt_flags))
+	if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags))
 		return -ENOTCONN;
 
 	sent = svc_sendto(rqstp, &rqstp->rq_res);
 	if (sent != xbufp->len) {
 		printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
-		       rqstp->rq_sock->sk_xprt.xpt_server->sv_name,
+		       rqstp->rq_xprt->xpt_server->sv_name,
 		       (sent<0)?"got error":"sent only",
 		       sent, xbufp->len);
-		set_bit(XPT_CLOSE, &rqstp->rq_sock->sk_xprt.xpt_flags);
+		set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags);
 		svc_xprt_enqueue(rqstp->rq_xprt);
 		sent = -EAGAIN;
 	}
@@ -1319,7 +1329,7 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
 
 static int svc_tcp_has_wspace(struct svc_xprt *xprt)
 {
-	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);
 	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
 	int required;
 
@@ -1641,7 +1651,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
 void
 svc_drop(struct svc_rqst *rqstp)
 {
-	dprintk("svc: socket %p dropped request\n", rqstp->rq_sock);
+	dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
 	svc_xprt_release(rqstp);
 }
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 31/38] svc: Make svc_check_conn_limits xprt independent
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (28 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 30/38] svc: Removing remaining references to rq_sock in rqstp Tom Tucker
@ 2007-11-29 22:40   ` Tom Tucker
  2007-11-29 22:41   ` [RFC,PATCH 32/38] svc: Move the xprt independent code to the svc_xprt.c file Tom Tucker
                     ` (7 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:40 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


The svc_check_conn_limits function only manipulates xprt fields. Change references
to svc_sock->sk_xprt to svc_xprt directly.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 net/sunrpc/svcsock.c |   31 +++++++++++++++----------------
 1 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c4b2a5e..53288cc 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1457,38 +1457,37 @@ svc_check_conn_limits(struct svc_serv *serv)
 	 * seconds. An attacker can easily beat that.
 	 *
 	 * The only somewhat efficient mechanism would be if drop
-	 * old connections from the same IP first. But right now
-	 * we don't even record the client IP in svc_sock.
+	 * old connections from the same IP first.
 	 */
 	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
-		struct svc_sock *svsk = NULL;
+		struct svc_xprt *xprt = NULL;
 		spin_lock_bh(&serv->sv_lock);
 		if (!list_empty(&serv->sv_tempsocks)) {
 			if (net_ratelimit()) {
 				/* Try to help the admin */
-				printk(KERN_NOTICE "%s: too many open TCP "
-					"sockets, consider increasing the "
+				printk(KERN_NOTICE "%s: too many open  "
+					"connections, consider increasing the "
 					"number of nfsd threads\n",
-						   serv->sv_name);
+				       serv->sv_name);
 				printk(KERN_NOTICE
-				       "%s: last TCP connect from %s\n",
+				       "%s: last connection from %s\n",
 				       serv->sv_name, buf);
 			}
 			/*
-			 * Always select the oldest socket. It's not fair,
+			 * Always select the oldest connection. It's not fair,
 			 * but so is life
 			 */
-			svsk = list_entry(serv->sv_tempsocks.prev,
-					  struct svc_sock,
-					  sk_xprt.xpt_list);
-			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
-			svc_xprt_get(&svsk->sk_xprt);
+			xprt = list_entry(serv->sv_tempsocks.prev,
+					  struct svc_xprt,
+					  xpt_list);
+			set_bit(XPT_CLOSE, &xprt->xpt_flags);
+			svc_xprt_get(xprt);
 		}
 		spin_unlock_bh(&serv->sv_lock);
 
-		if (svsk) {
-			svc_xprt_enqueue(&svsk->sk_xprt);
-			svc_xprt_put(&svsk->sk_xprt);
+		if (xprt) {
+			svc_xprt_enqueue(xprt);
+			svc_xprt_put(xprt);
 		}
 	}
 }

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 32/38] svc: Move the xprt independent code to the svc_xprt.c file
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (29 preceding siblings ...)
  2007-11-29 22:40   ` [RFC,PATCH 31/38] svc: Make svc_check_conn_limits xprt independent Tom Tucker
@ 2007-11-29 22:41   ` Tom Tucker
  2007-11-29 22:41   ` [RFC,PATCH 33/38] svc: Add transport hdr size for defer/revisit Tom Tucker
                     ` (6 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:41 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


This functionally trivial patch moves all of the transport independent
functions from the svcsock.c file to the transport independent svc_xprt.c
file.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    5 
 net/sunrpc/svc_xprt.c           |  708 ++++++++++++++++++++++++++++++++++++++
 net/sunrpc/svcsock.c            |  719 ---------------------------------------
 3 files changed, 713 insertions(+), 719 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 60bdffc..36f8b09 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -72,9 +72,14 @@ int	svc_unreg_xprt_class(struct svc_xprt_class *);
 void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
 		      struct svc_serv *);
 int	svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
+void	svc_close_xprt(struct svc_xprt *xprt);
 void	svc_xprt_received(struct svc_xprt *);
 void	svc_xprt_put(struct svc_xprt *xprt);
 void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
+void	svc_xprt_enqueue(struct svc_xprt *xprt);
+int	svc_port_is_privileged(struct sockaddr *sin);
+void	svc_delete_xprt(struct svc_xprt *xprt);
+
 static inline void svc_xprt_get(struct svc_xprt *xprt)
 {
 	kref_get(&xprt->xpt_ref);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 924df63..56204e9 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -35,6 +35,18 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
+static int svc_deferred_recv(struct svc_rqst *rqstp);
+static struct cache_deferred_req *svc_defer(struct cache_req *req);
+static void svc_age_temp_xprts(unsigned long closure);
+
+/* Apparently the "standard" is that clients close
+ * idle connections after 5 minutes, servers after
+ * 6 minutes
+ *   http://www.connectathon.org/talks96/nfstcp.pdf
+ */
+static int svc_conn_age_period = 6*60;
+
 /* List of registered transport classes */
 static DEFINE_SPINLOCK(svc_xprt_class_lock);
 static LIST_HEAD(svc_xprt_class_list);
@@ -191,3 +203,699 @@ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs);
 
+/*
+ * Queue up an idle server thread.  Must have pool->sp_lock held.
+ * Note: this is really a stack rather than a queue, so that we only
+ * use as many different threads as we need, and the rest don't pollute
+ * the cache.
+ */
+static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
+{
+	list_add(&rqstp->rq_list, &pool->sp_threads);
+}
+
+/*
+ * Dequeue an nfsd thread.  Must have pool->sp_lock held.
+ */
+static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
+{
+	list_del(&rqstp->rq_list);
+}
+
+/*
+ * Queue up a transport with data pending. If there are idle nfsd
+ * processes, wake 'em up.
+ *
+ */
+void svc_xprt_enqueue(struct svc_xprt *xprt)
+{
+	struct svc_serv	*serv = xprt->xpt_server;
+	struct svc_pool *pool;
+	struct svc_rqst	*rqstp;
+	int cpu;
+
+	if (!(xprt->xpt_flags &
+	      ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
+		return;
+	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
+		return;
+
+	cpu = get_cpu();
+	pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
+	put_cpu();
+
+	spin_lock_bh(&pool->sp_lock);
+
+	if (!list_empty(&pool->sp_threads) &&
+	    !list_empty(&pool->sp_sockets))
+		printk(KERN_ERR
+		       "svc_xprt_enqueue: "
+		       "threads and transports both waiting??\n");
+
+	if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
+		/* Don't enqueue dead transports */
+		dprintk("svc: transport %p is dead, not enqueued\n", xprt);
+		goto out_unlock;
+	}
+
+	/* Mark transport as busy. It will remain in this state until the
+	 * server has processed all pending data and put the transport back
+	 * on the idle list.  We update XPT_BUSY atomically because
+	 * it also guards against trying to enqueue the svc_sock twice.
+	 */
+	if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
+		/* Don't enqueue transport while already enqueued */
+		dprintk("svc: transport %p busy, not enqueued\n", xprt);
+		goto out_unlock;
+	}
+	BUG_ON(xprt->xpt_pool != NULL);
+	xprt->xpt_pool = pool;
+
+	/* Handle pending connection */
+	if (test_bit(XPT_CONN, &xprt->xpt_flags))
+		goto process;
+
+	/* Handle close in-progress */
+	if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
+		goto process;
+
+	/* Check if we have space to reply to a request */
+	if (!xprt->xpt_ops->xpo_has_wspace(xprt)) {
+		/* Don't enqueue while not enough space for reply */
+		dprintk("svc: no write space, transport %p  not enqueued\n",
+			xprt);
+		xprt->xpt_pool = NULL;
+		clear_bit(XPT_BUSY, &xprt->xpt_flags);
+		goto out_unlock;
+	}
+
+ process:
+	if (!list_empty(&pool->sp_threads)) {
+		rqstp = list_entry(pool->sp_threads.next,
+				   struct svc_rqst,
+				   rq_list);
+		dprintk("svc: transport %p served by daemon %p\n",
+			xprt, rqstp);
+		svc_thread_dequeue(pool, rqstp);
+		if (rqstp->rq_xprt)
+			printk(KERN_ERR
+				"svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
+				rqstp, rqstp->rq_xprt);
+		rqstp->rq_xprt = xprt;
+		svc_xprt_get(xprt);
+		rqstp->rq_reserved = serv->sv_max_mesg;
+		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+		BUG_ON(xprt->xpt_pool != pool);
+		wake_up(&rqstp->rq_wait);
+	} else {
+		dprintk("svc: transport %p put into queue\n", xprt);
+		list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
+		BUG_ON(xprt->xpt_pool != pool);
+	}
+
+out_unlock:
+	spin_unlock_bh(&pool->sp_lock);
+}
+EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
+
+/*
+ * Dequeue the first transport.  Must be called with the pool->sp_lock held.
+ */
+static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
+{
+	struct svc_xprt	*xprt;
+
+	if (list_empty(&pool->sp_sockets))
+		return NULL;
+
+	xprt = list_entry(pool->sp_sockets.next,
+			  struct svc_xprt, xpt_ready);
+	list_del_init(&xprt->xpt_ready);
+
+	dprintk("svc: transport %p dequeued, inuse=%d\n",
+		xprt, atomic_read(&xprt->xpt_ref.refcount));
+
+	return xprt;
+}
+
+/*
+ * Having read something from a transport, check whether it
+ * needs to be re-enqueued.
+ * Note: XPT_DATA only gets cleared when a read-attempt finds
+ * no (or insufficient) data.
+ */
+void svc_xprt_received(struct svc_xprt *xprt)
+{
+	xprt->xpt_pool = NULL;
+	clear_bit(XPT_BUSY, &xprt->xpt_flags);
+	svc_xprt_enqueue(xprt);
+}
+EXPORT_SYMBOL_GPL(svc_xprt_received);
+
+/**
+ * svc_reserve - change the space reserved for the reply to a request.
+ * @rqstp:  The request in question
+ * @space: new max space to reserve
+ *
+ * Each request reserves some space on the output queue of the transport
+ * to make sure the reply fits.  This function reduces that reserved
+ * space to be the amount of space used already, plus @space.
+ *
+ */
+void svc_reserve(struct svc_rqst *rqstp, int space)
+{
+	space += rqstp->rq_res.head[0].iov_len;
+
+	if (space < rqstp->rq_reserved) {
+		struct svc_xprt *xprt = rqstp->rq_xprt;
+		atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
+		rqstp->rq_reserved = space;
+
+		svc_xprt_enqueue(xprt);
+	}
+}
+
+static void svc_xprt_release(struct svc_rqst *rqstp)
+{
+	struct svc_xprt	*xprt = rqstp->rq_xprt;
+
+	rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
+
+	svc_free_res_pages(rqstp);
+	rqstp->rq_res.page_len = 0;
+	rqstp->rq_res.page_base = 0;
+
+	/* Reset response buffer and release
+	 * the reservation.
+	 * But first, check that enough space was reserved
+	 * for the reply, otherwise we have a bug!
+	 */
+	if ((rqstp->rq_res.len) >  rqstp->rq_reserved)
+		printk(KERN_ERR "RPC request reserved %d but used %d\n",
+		       rqstp->rq_reserved,
+		       rqstp->rq_res.len);
+
+	rqstp->rq_res.head[0].iov_len = 0;
+	svc_reserve(rqstp, 0);
+	rqstp->rq_xprt = NULL;
+
+	svc_xprt_put(xprt);
+}
+
+/*
+ * External function to wake up a server waiting for data
+ * This really only makes sense for services like lockd
+ * which have exactly one thread anyway.
+ */
+void svc_wake_up(struct svc_serv *serv)
+{
+	struct svc_rqst	*rqstp;
+	unsigned int i;
+	struct svc_pool *pool;
+
+	for (i = 0; i < serv->sv_nrpools; i++) {
+		pool = &serv->sv_pools[i];
+
+		spin_lock_bh(&pool->sp_lock);
+		if (!list_empty(&pool->sp_threads)) {
+			rqstp = list_entry(pool->sp_threads.next,
+					   struct svc_rqst,
+					   rq_list);
+			dprintk("svc: daemon %p woken up.\n", rqstp);
+			/*
+			svc_thread_dequeue(pool, rqstp);
+			rqstp->rq_xprt = NULL;
+			 */
+			wake_up(&rqstp->rq_wait);
+		}
+		spin_unlock_bh(&pool->sp_lock);
+	}
+}
+
+int svc_port_is_privileged(struct sockaddr *sin)
+{
+	switch (sin->sa_family) {
+	case AF_INET:
+		return ntohs(((struct sockaddr_in *)sin)->sin_port)
+			< PROT_SOCK;
+	case AF_INET6:
+		return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
+			< PROT_SOCK;
+	default:
+		return 0;
+	}
+}
+
+static void svc_check_conn_limits(struct svc_serv *serv)
+{
+	char	buf[RPC_MAX_ADDRBUFLEN];
+
+	/* make sure that we don't have too many active connections.
+	 * If we have, something must be dropped.
+	 *
+	 * There's no point in trying to do random drop here for
+	 * DoS prevention. The NFS clients does 1 reconnect in 15
+	 * seconds. An attacker can easily beat that.
+	 *
+	 * The only somewhat efficient mechanism would be if drop
+	 * old connections from the same IP first.
+	 */
+	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
+		struct svc_xprt *xprt = NULL;
+		spin_lock_bh(&serv->sv_lock);
+		if (!list_empty(&serv->sv_tempsocks)) {
+			if (net_ratelimit()) {
+				/* Try to help the admin */
+				printk(KERN_NOTICE "%s: too many open  "
+					"connections, consider increasing the "
+					"number of nfsd threads\n",
+				       serv->sv_name);
+				printk(KERN_NOTICE
+				       "%s: last connection from %s\n",
+				       serv->sv_name, buf);
+			}
+			/*
+			 * Always select the oldest connection. It's not fair,
+			 * but so is life
+			 */
+			xprt = list_entry(serv->sv_tempsocks.prev,
+					  struct svc_xprt,
+					  xpt_list);
+			set_bit(XPT_CLOSE, &xprt->xpt_flags);
+			svc_xprt_get(xprt);
+		}
+		spin_unlock_bh(&serv->sv_lock);
+
+		if (xprt) {
+			svc_xprt_enqueue(xprt);
+			svc_xprt_put(xprt);
+		}
+	}
+}
+
+/*
+ * Receive the next request on any transport.  This code is carefully
+ * organised not to touch any cachelines in the shared svc_serv
+ * structure, only cachelines in the local svc_pool.
+ */
+int svc_recv(struct svc_rqst *rqstp, long timeout)
+{
+	struct svc_xprt		*xprt = NULL;
+	struct svc_serv		*serv = rqstp->rq_server;
+	struct svc_pool		*pool = rqstp->rq_pool;
+	int			len, i;
+	int			pages;
+	struct xdr_buf		*arg;
+	DECLARE_WAITQUEUE(wait, current);
+
+	dprintk("svc: server %p waiting for data (to = %ld)\n",
+		rqstp, timeout);
+
+	if (rqstp->rq_xprt)
+		printk(KERN_ERR
+			"svc_recv: service %p, transport not NULL!\n",
+			 rqstp);
+	if (waitqueue_active(&rqstp->rq_wait))
+		printk(KERN_ERR
+			"svc_recv: service %p, wait queue active!\n",
+			 rqstp);
+
+
+	/* now allocate needed pages.  If we get a failure, sleep briefly */
+	pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
+	for (i = 0; i < pages ; i++)
+		while (rqstp->rq_pages[i] == NULL) {
+			struct page *p = alloc_page(GFP_KERNEL);
+			if (!p)
+				schedule_timeout_uninterruptible(msecs_to_jiffies(500));
+			rqstp->rq_pages[i] = p;
+		}
+	rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
+	BUG_ON(pages >= RPCSVC_MAXPAGES);
+
+	/* Make arg->head point to first page and arg->pages point to rest */
+	arg = &rqstp->rq_arg;
+	arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
+	arg->head[0].iov_len = PAGE_SIZE;
+	arg->pages = rqstp->rq_pages + 1;
+	arg->page_base = 0;
+	/* save at least one page for response */
+	arg->page_len = (pages-2)*PAGE_SIZE;
+	arg->len = (pages-1)*PAGE_SIZE;
+	arg->tail[0].iov_len = 0;
+
+	try_to_freeze();
+	cond_resched();
+	if (signalled())
+		return -EINTR;
+
+	spin_lock_bh(&pool->sp_lock);
+	xprt = svc_xprt_dequeue(pool);
+	if (xprt) {
+		rqstp->rq_xprt = xprt;
+		svc_xprt_get(xprt);
+		rqstp->rq_reserved = serv->sv_max_mesg;
+		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+	} else {
+		/* No data pending. Go to sleep */
+		svc_thread_enqueue(pool, rqstp);
+
+		/*
+		 * We have to be able to interrupt this wait
+		 * to bring down the daemons ...
+		 */
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&rqstp->rq_wait, &wait);
+		spin_unlock_bh(&pool->sp_lock);
+
+		schedule_timeout(timeout);
+
+		try_to_freeze();
+
+		spin_lock_bh(&pool->sp_lock);
+		remove_wait_queue(&rqstp->rq_wait, &wait);
+
+		xprt = rqstp->rq_xprt;
+		if (!xprt) {
+			svc_thread_dequeue(pool, rqstp);
+			spin_unlock_bh(&pool->sp_lock);
+			dprintk("svc: server %p, no data yet\n", rqstp);
+			return signalled()? -EINTR : -EAGAIN;
+		}
+	}
+	spin_unlock_bh(&pool->sp_lock);
+
+	len = 0;
+	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
+		dprintk("svc_recv: found XPT_CLOSE\n");
+		svc_delete_xprt(xprt);
+	} else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
+		struct svc_xprt *newxpt;
+		newxpt = xprt->xpt_ops->xpo_accept(xprt);
+		if (newxpt) {
+			/*
+			 * We know this module_get will succeed because the
+			 * listener holds a reference too
+			 */
+			__module_get(newxpt->xpt_class->xcl_owner);
+			svc_check_conn_limits(xprt->xpt_server);
+			spin_lock_bh(&serv->sv_lock);
+			set_bit(XPT_TEMP, &newxpt->xpt_flags);
+			list_add(&newxpt->xpt_list, &serv->sv_tempsocks);
+			serv->sv_tmpcnt++;
+			if (serv->sv_temptimer.function == NULL) {
+				/* setup timer to age temp transports */
+				setup_timer(&serv->sv_temptimer,
+					    svc_age_temp_xprts,
+					    (unsigned long)serv);
+				mod_timer(&serv->sv_temptimer,
+					  jiffies + svc_conn_age_period * HZ);
+			}
+			spin_unlock_bh(&serv->sv_lock);
+			svc_xprt_received(newxpt);
+		}
+		svc_xprt_received(xprt);
+	} else {
+		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
+			rqstp, pool->sp_id, xprt,
+			atomic_read(&xprt->xpt_ref.refcount));
+		rqstp->rq_deferred = svc_deferred_dequeue(xprt);
+		if (rqstp->rq_deferred) {
+			svc_xprt_received(xprt);
+			len = svc_deferred_recv(rqstp);
+		} else
+			len = xprt->xpt_ops->xpo_recvfrom(rqstp);
+		dprintk("svc: got len=%d\n", len);
+	}
+
+	/* No data, incomplete (TCP) read, or accept() */
+	if (len == 0 || len == -EAGAIN) {
+		rqstp->rq_res.len = 0;
+		svc_xprt_release(rqstp);
+		return -EAGAIN;
+	}
+	clear_bit(XPT_OLD, &xprt->xpt_flags);
+
+	rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
+	rqstp->rq_chandle.defer = svc_defer;
+
+	if (serv->sv_stats)
+		serv->sv_stats->netcnt++;
+	return len;
+}
+
+/*
+ * Drop request
+ */
+void svc_drop(struct svc_rqst *rqstp)
+{
+	dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
+	svc_xprt_release(rqstp);
+}
+
+/*
+ * Return reply to client.
+ */
+int svc_send(struct svc_rqst *rqstp)
+{
+	struct svc_xprt	*xprt;
+	int		len;
+	struct xdr_buf	*xb;
+
+	xprt = rqstp->rq_xprt;
+	if (!xprt)
+		return -EFAULT;
+
+	/* release the receive skb before sending the reply */
+	rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
+
+	/* calculate over-all length */
+	xb = &rqstp->rq_res;
+	xb->len = xb->head[0].iov_len +
+		xb->page_len +
+		xb->tail[0].iov_len;
+
+	/* Grab mutex to serialize outgoing data. */
+	mutex_lock(&xprt->xpt_mutex);
+	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
+		len = -ENOTCONN;
+	else
+		len = xprt->xpt_ops->xpo_sendto(rqstp);
+	mutex_unlock(&xprt->xpt_mutex);
+	svc_xprt_release(rqstp);
+
+	if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
+		return 0;
+	return len;
+}
+
+/*
+ * Timer function to close old temporary transports, using
+ * a mark-and-sweep algorithm.
+ */
+static void svc_age_temp_xprts(unsigned long closure)
+{
+	struct svc_serv *serv = (struct svc_serv *)closure;
+	struct svc_xprt *xprt;
+	struct list_head *le, *next;
+	LIST_HEAD(to_be_aged);
+
+	dprintk("svc_age_temp_xprts\n");
+
+	if (!spin_trylock_bh(&serv->sv_lock)) {
+		/* busy, try again 1 sec later */
+		dprintk("svc_age_temp_xprts: busy\n");
+		mod_timer(&serv->sv_temptimer, jiffies + HZ);
+		return;
+	}
+
+	list_for_each_safe(le, next, &serv->sv_tempsocks) {
+		xprt = list_entry(le, struct svc_xprt, xpt_list);
+
+		/* First time through, just mark it OLD. Second time
+		 * through, close it. */
+		if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags))
+			continue;
+		if (atomic_read(&xprt->xpt_ref.refcount) > 1
+		    || test_bit(XPT_BUSY, &xprt->xpt_flags))
+			continue;
+		svc_xprt_get(xprt);
+		list_move(le, &to_be_aged);
+		set_bit(XPT_CLOSE, &xprt->xpt_flags);
+		set_bit(XPT_DETACHED, &xprt->xpt_flags);
+	}
+	spin_unlock_bh(&serv->sv_lock);
+
+	while (!list_empty(&to_be_aged)) {
+		le = to_be_aged.next;
+		/* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */
+		list_del_init(le);
+		xprt = list_entry(le, struct svc_xprt, xpt_list);
+
+		dprintk("queuing xprt %p for closing\n", xprt);
+
+		/* a thread will dequeue and close it soon */
+		svc_xprt_enqueue(xprt);
+		svc_xprt_put(xprt);
+	}
+
+	mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
+}
+
+/*
+ * Remove a dead transport
+ */
+void svc_delete_xprt(struct svc_xprt *xprt)
+{
+	struct svc_serv	*serv = xprt->xpt_server;
+
+	dprintk("svc: svc_delete_xprt(%p)\n", xprt);
+	xprt->xpt_ops->xpo_detach(xprt);
+
+	spin_lock_bh(&serv->sv_lock);
+	if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
+		list_del_init(&xprt->xpt_list);
+	/*
+	 * We used to delete the transport from whichever list
+	 * it's sk_xprt.xpt_ready node was on, but we don't actually
+	 * need to.  This is because the only time we're called
+	 * while still attached to a queue, the queue itself
+	 * is about to be destroyed (in svc_destroy).
+	 */
+	if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) {
+		BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2);
+		svc_xprt_put(xprt);
+		if (test_bit(XPT_TEMP, &xprt->xpt_flags))
+			serv->sv_tmpcnt--;
+	}
+	spin_unlock_bh(&serv->sv_lock);
+}
+
+void svc_close_xprt(struct svc_xprt *xprt)
+{
+	set_bit(XPT_CLOSE, &xprt->xpt_flags);
+	if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
+		/* someone else will have to effect the close */
+		return;
+
+	svc_xprt_get(xprt);
+	svc_delete_xprt(xprt);
+	clear_bit(XPT_BUSY, &xprt->xpt_flags);
+	svc_xprt_put(xprt);
+}
+
+void svc_close_all(struct list_head *xprt_list)
+{
+	struct svc_xprt *xprt;
+	struct svc_xprt *tmp;
+
+	list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
+		set_bit(XPT_CLOSE, &xprt->xpt_flags);
+		if (test_bit(XPT_BUSY, &xprt->xpt_flags)) {
+			/* Waiting to be processed, but no threads left,
+			 * So just remove it from the waiting list
+			 */
+			list_del_init(&xprt->xpt_ready);
+			clear_bit(XPT_BUSY, &xprt->xpt_flags);
+		}
+		svc_close_xprt(xprt);
+	}
+}
+
+/*
+ * Handle defer and revisit of requests
+ */
+
+static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
+{
+	struct svc_deferred_req *dr =
+		container_of(dreq, struct svc_deferred_req, handle);
+	struct svc_xprt *xprt = dr->xprt;
+
+	if (too_many) {
+		svc_xprt_put(xprt);
+		kfree(dr);
+		return;
+	}
+	dprintk("revisit queued\n");
+	dr->xprt = NULL;
+	spin_lock(&xprt->xpt_lock);
+	list_add(&dr->handle.recent, &xprt->xpt_deferred);
+	spin_unlock(&xprt->xpt_lock);
+	set_bit(XPT_DEFERRED, &xprt->xpt_flags);
+	svc_xprt_enqueue(xprt);
+	svc_xprt_put(xprt);
+}
+
+static struct cache_deferred_req *svc_defer(struct cache_req *req)
+{
+	struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
+	int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len);
+	struct svc_deferred_req *dr;
+
+	if (rqstp->rq_arg.page_len)
+		return NULL; /* if more than a page, give up FIXME */
+	if (rqstp->rq_deferred) {
+		dr = rqstp->rq_deferred;
+		rqstp->rq_deferred = NULL;
+	} else {
+		int skip  = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
+		/* FIXME maybe discard if size too large */
+		dr = kmalloc(size, GFP_KERNEL);
+		if (dr == NULL)
+			return NULL;
+
+		dr->handle.owner = rqstp->rq_server;
+		dr->prot = rqstp->rq_prot;
+		memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen);
+		dr->addrlen = rqstp->rq_addrlen;
+		dr->daddr = rqstp->rq_daddr;
+		dr->argslen = rqstp->rq_arg.len >> 2;
+		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip,
+		       dr->argslen<<2);
+	}
+	svc_xprt_get(rqstp->rq_xprt);
+	dr->xprt = rqstp->rq_xprt;
+
+	dr->handle.revisit = svc_revisit;
+	return &dr->handle;
+}
+
+/*
+ * recv data from a deferred request into an active one
+ */
+static int svc_deferred_recv(struct svc_rqst *rqstp)
+{
+	struct svc_deferred_req *dr = rqstp->rq_deferred;
+
+	rqstp->rq_arg.head[0].iov_base = dr->args;
+	rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
+	rqstp->rq_arg.page_len = 0;
+	rqstp->rq_arg.len = dr->argslen<<2;
+	rqstp->rq_prot        = dr->prot;
+	memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
+	rqstp->rq_addrlen     = dr->addrlen;
+	rqstp->rq_daddr       = dr->daddr;
+	rqstp->rq_respages    = rqstp->rq_pages;
+	return dr->argslen<<2;
+}
+
+
+static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
+{
+	struct svc_deferred_req *dr = NULL;
+
+	if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags))
+		return NULL;
+	spin_lock(&xprt->xpt_lock);
+	clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
+	if (!list_empty(&xprt->xpt_deferred)) {
+		dr = list_entry(xprt->xpt_deferred.next,
+				struct svc_deferred_req,
+				handle.recent);
+		list_del_init(&dr->handle.recent);
+		set_bit(XPT_DEFERRED, &xprt->xpt_flags);
+	}
+	spin_unlock(&xprt->xpt_lock);
+	return dr;
+}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 53288cc..23a2ab6 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -81,27 +81,14 @@
 
 static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
 					 int *errp, int flags);
-static void		svc_delete_xprt(struct svc_xprt *xprt);
 static void		svc_udp_data_ready(struct sock *, int);
 static int		svc_udp_recvfrom(struct svc_rqst *);
 static int		svc_udp_sendto(struct svc_rqst *);
-static void		svc_close_xprt(struct svc_xprt *xprt);
 static void		svc_sock_detach(struct svc_xprt *);
 static void		svc_sock_free(struct svc_xprt *);
 
-static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
-static int svc_deferred_recv(struct svc_rqst *rqstp);
-static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static struct svc_xprt *
 svc_create_socket(struct svc_serv *, int, struct sockaddr *, int, int);
-static void svc_age_temp_xprts(unsigned long closure);
-
-/* apparently the "standard" is that clients close
- * idle connections after 5 minutes, servers after
- * 6 minutes
- *   http://www.connectathon.org/talks96/nfstcp.pdf
- */
-static int svc_conn_age_period = 6*60;
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key svc_key[2];
@@ -172,27 +159,6 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
 EXPORT_SYMBOL_GPL(svc_print_addr);
 
 /*
- * Queue up an idle server thread.  Must have pool->sp_lock held.
- * Note: this is really a stack rather than a queue, so that we only
- * use as many different threads as we need, and the rest don't pollute
- * the cache.
- */
-static inline void
-svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
-{
-	list_add(&rqstp->rq_list, &pool->sp_threads);
-}
-
-/*
- * Dequeue an nfsd thread.  Must have pool->sp_lock held.
- */
-static inline void
-svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
-{
-	list_del(&rqstp->rq_list);
-}
-
-/*
  * Release an skbuff after use
  */
 static void svc_release_skb(struct svc_rqst *rqstp)
@@ -230,219 +196,6 @@ svc_sock_wspace(struct svc_sock *svsk)
 	return wspace;
 }
 
-/*
- * Queue up a socket with data pending. If there are idle nfsd
- * processes, wake 'em up.
- *
- */
-void
-svc_xprt_enqueue(struct svc_xprt *xprt)
-{
-	struct svc_serv	*serv = xprt->xpt_server;
-	struct svc_pool *pool;
-	struct svc_rqst	*rqstp;
-	int cpu;
-
-	if (!(xprt->xpt_flags &
-	      ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
-		return;
-	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
-		return;
-
-	cpu = get_cpu();
-	pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
-	put_cpu();
-
-	spin_lock_bh(&pool->sp_lock);
-
-	if (!list_empty(&pool->sp_threads) &&
-	    !list_empty(&pool->sp_sockets))
-		printk(KERN_ERR
-		       "svc_xprt_enqueue: "
-		       "threads and transports both waiting??\n");
-
-	if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
-		/* Don't enqueue dead sockets */
-		dprintk("svc: transport %p is dead, not enqueued\n", xprt);
-		goto out_unlock;
-	}
-
-	/* Mark socket as busy. It will remain in this state until the
-	 * server has processed all pending data and put the socket back
-	 * on the idle list.  We update XPT_BUSY atomically because
-	 * it also guards against trying to enqueue the svc_sock twice.
-	 */
-	if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
-		/* Don't enqueue socket while already enqueued */
-		dprintk("svc: transport %p busy, not enqueued\n", xprt);
-		goto out_unlock;
-	}
-	BUG_ON(xprt->xpt_pool != NULL);
-	xprt->xpt_pool = pool;
-
-	/* Handle pending connection */
-	if (test_bit(XPT_CONN, &xprt->xpt_flags))
-		goto process;
-
-	/* Handle close in-progress */
-	if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
-		goto process;
-
-	/* Check if we have space to reply to a request */
-	if (!xprt->xpt_ops->xpo_has_wspace(xprt)) {
-		/* Don't enqueue while not enough space for reply */
-		dprintk("svc: no write space, transport %p  not enqueued\n",
-			xprt);
-		xprt->xpt_pool = NULL;
-		clear_bit(XPT_BUSY, &xprt->xpt_flags);
-		goto out_unlock;
-	}
-
- process:
-	if (!list_empty(&pool->sp_threads)) {
-		rqstp = list_entry(pool->sp_threads.next,
-				   struct svc_rqst,
-				   rq_list);
-		dprintk("svc: transport %p served by daemon %p\n",
-			xprt, rqstp);
-		svc_thread_dequeue(pool, rqstp);
-		if (rqstp->rq_xprt)
-			printk(KERN_ERR
-				"svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
-				rqstp, rqstp->rq_xprt);
-		rqstp->rq_xprt = xprt;
-		svc_xprt_get(xprt);
-		rqstp->rq_reserved = serv->sv_max_mesg;
-		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
-		BUG_ON(xprt->xpt_pool != pool);
-		wake_up(&rqstp->rq_wait);
-	} else {
-		dprintk("svc: transport %p put into queue\n", xprt);
-		list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
-		BUG_ON(xprt->xpt_pool != pool);
-	}
-
-out_unlock:
-	spin_unlock_bh(&pool->sp_lock);
-}
-EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
-
-/*
- * Dequeue the first socket.  Must be called with the pool->sp_lock held.
- */
-static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
-{
-	struct svc_xprt	*xprt;
-
-	if (list_empty(&pool->sp_sockets))
-		return NULL;
-
-	xprt = list_entry(pool->sp_sockets.next,
-			  struct svc_xprt, xpt_ready);
-	list_del_init(&xprt->xpt_ready);
-
-	dprintk("svc: transport %p dequeued, inuse=%d\n",
-		xprt, atomic_read(&xprt->xpt_ref.refcount));
-
-	return xprt;
-}
-
-/*
- * Having read something from a socket, check whether it
- * needs to be re-enqueued.
- * Note: XPT_DATA only gets cleared when a read-attempt finds
- * no (or insufficient) data.
- */
-void
-svc_xprt_received(struct svc_xprt *xprt)
-{
-	xprt->xpt_pool = NULL;
-	clear_bit(XPT_BUSY, &xprt->xpt_flags);
-	svc_xprt_enqueue(xprt);
-}
-EXPORT_SYMBOL_GPL(svc_xprt_received);
-
-/**
- * svc_reserve - change the space reserved for the reply to a request.
- * @rqstp:  The request in question
- * @space: new max space to reserve
- *
- * Each request reserves some space on the output queue of the socket
- * to make sure the reply fits.  This function reduces that reserved
- * space to be the amount of space used already, plus @space.
- *
- */
-void svc_reserve(struct svc_rqst *rqstp, int space)
-{
-	space += rqstp->rq_res.head[0].iov_len;
-
-	if (space < rqstp->rq_reserved) {
-		struct svc_xprt *xprt = rqstp->rq_xprt;
-		atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
-		rqstp->rq_reserved = space;
-
-		svc_xprt_enqueue(xprt);
-	}
-}
-
-static void svc_xprt_release(struct svc_rqst *rqstp)
-{
-	struct svc_xprt	*xprt = rqstp->rq_xprt;
-
-	rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
-
-	svc_free_res_pages(rqstp);
-	rqstp->rq_res.page_len = 0;
-	rqstp->rq_res.page_base = 0;
-
-	/* Reset response buffer and release
-	 * the reservation.
-	 * But first, check that enough space was reserved
-	 * for the reply, otherwise we have a bug!
-	 */
-	if ((rqstp->rq_res.len) >  rqstp->rq_reserved)
-		printk(KERN_ERR "RPC request reserved %d but used %d\n",
-		       rqstp->rq_reserved,
-		       rqstp->rq_res.len);
-
-	rqstp->rq_res.head[0].iov_len = 0;
-	svc_reserve(rqstp, 0);
-	rqstp->rq_xprt = NULL;
-
-	svc_xprt_put(xprt);
-}
-
-/*
- * External function to wake up a server waiting for data
- * This really only makes sense for services like lockd
- * which have exactly one thread anyway.
- */
-void
-svc_wake_up(struct svc_serv *serv)
-{
-	struct svc_rqst	*rqstp;
-	unsigned int i;
-	struct svc_pool *pool;
-
-	for (i = 0; i < serv->sv_nrpools; i++) {
-		pool = &serv->sv_pools[i];
-
-		spin_lock_bh(&pool->sp_lock);
-		if (!list_empty(&pool->sp_threads)) {
-			rqstp = list_entry(pool->sp_threads.next,
-					   struct svc_rqst,
-					   rq_list);
-			dprintk("svc: daemon %p woken up.\n", rqstp);
-			/*
-			svc_thread_dequeue(pool, rqstp);
-			rqstp->rq_xprt = NULL;
-			 */
-			wake_up(&rqstp->rq_wait);
-		}
-		spin_unlock_bh(&pool->sp_lock);
-	}
-}
-
 union svc_pktinfo_u {
 	struct in_pktinfo pkti;
 	struct in6_pktinfo pkti6;
@@ -1034,20 +787,6 @@ svc_tcp_data_ready(struct sock *sk, int count)
 		wake_up_interruptible(sk->sk_sleep);
 }
 
-static inline int svc_port_is_privileged(struct sockaddr *sin)
-{
-	switch (sin->sa_family) {
-	case AF_INET:
-		return ntohs(((struct sockaddr_in *)sin)->sin_port)
-			< PROT_SOCK;
-	case AF_INET6:
-		return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
-			< PROT_SOCK;
-	default:
-		return 0;
-	}
-}
-
 /*
  * Accept a TCP connection
  */
@@ -1444,307 +1183,6 @@ svc_sock_update_bufs(struct svc_serv *serv)
 	spin_unlock_bh(&serv->sv_lock);
 }
 
-static void
-svc_check_conn_limits(struct svc_serv *serv)
-{
-	char	buf[RPC_MAX_ADDRBUFLEN];
-
-	/* make sure that we don't have too many active connections.
-	 * If we have, something must be dropped.
-	 *
-	 * There's no point in trying to do random drop here for
-	 * DoS prevention. The NFS clients does 1 reconnect in 15
-	 * seconds. An attacker can easily beat that.
-	 *
-	 * The only somewhat efficient mechanism would be if drop
-	 * old connections from the same IP first.
-	 */
-	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
-		struct svc_xprt *xprt = NULL;
-		spin_lock_bh(&serv->sv_lock);
-		if (!list_empty(&serv->sv_tempsocks)) {
-			if (net_ratelimit()) {
-				/* Try to help the admin */
-				printk(KERN_NOTICE "%s: too many open  "
-					"connections, consider increasing the "
-					"number of nfsd threads\n",
-				       serv->sv_name);
-				printk(KERN_NOTICE
-				       "%s: last connection from %s\n",
-				       serv->sv_name, buf);
-			}
-			/*
-			 * Always select the oldest connection. It's not fair,
-			 * but so is life
-			 */
-			xprt = list_entry(serv->sv_tempsocks.prev,
-					  struct svc_xprt,
-					  xpt_list);
-			set_bit(XPT_CLOSE, &xprt->xpt_flags);
-			svc_xprt_get(xprt);
-		}
-		spin_unlock_bh(&serv->sv_lock);
-
-		if (xprt) {
-			svc_xprt_enqueue(xprt);
-			svc_xprt_put(xprt);
-		}
-	}
-}
-
-/*
- * Receive the next request on any socket.  This code is carefully
- * organised not to touch any cachelines in the shared svc_serv
- * structure, only cachelines in the local svc_pool.
- */
-int
-svc_recv(struct svc_rqst *rqstp, long timeout)
-{
-	struct svc_xprt		*xprt = NULL;
-	struct svc_serv		*serv = rqstp->rq_server;
-	struct svc_pool		*pool = rqstp->rq_pool;
-	int			len, i;
-	int			pages;
-	struct xdr_buf		*arg;
-	DECLARE_WAITQUEUE(wait, current);
-
-	dprintk("svc: server %p waiting for data (to = %ld)\n",
-		rqstp, timeout);
-
-	if (rqstp->rq_xprt)
-		printk(KERN_ERR
-			"svc_recv: service %p, transport not NULL!\n",
-			 rqstp);
-	if (waitqueue_active(&rqstp->rq_wait))
-		printk(KERN_ERR
-			"svc_recv: service %p, wait queue active!\n",
-			 rqstp);
-
-
-	/* now allocate needed pages.  If we get a failure, sleep briefly */
-	pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
-	for (i=0; i < pages ; i++)
-		while (rqstp->rq_pages[i] == NULL) {
-			struct page *p = alloc_page(GFP_KERNEL);
-			if (!p)
-				schedule_timeout_uninterruptible(msecs_to_jiffies(500));
-			rqstp->rq_pages[i] = p;
-		}
-	rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
-	BUG_ON(pages >= RPCSVC_MAXPAGES);
-
-	/* Make arg->head point to first page and arg->pages point to rest */
-	arg = &rqstp->rq_arg;
-	arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
-	arg->head[0].iov_len = PAGE_SIZE;
-	arg->pages = rqstp->rq_pages + 1;
-	arg->page_base = 0;
-	/* save at least one page for response */
-	arg->page_len = (pages-2)*PAGE_SIZE;
-	arg->len = (pages-1)*PAGE_SIZE;
-	arg->tail[0].iov_len = 0;
-
-	try_to_freeze();
-	cond_resched();
-	if (signalled())
-		return -EINTR;
-
-	spin_lock_bh(&pool->sp_lock);
-	xprt = svc_xprt_dequeue(pool);
-	if (xprt) {
-		rqstp->rq_xprt = xprt;
-		svc_xprt_get(xprt);
-		rqstp->rq_reserved = serv->sv_max_mesg;
-		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
-	} else {
-		/* No data pending. Go to sleep */
-		svc_thread_enqueue(pool, rqstp);
-
-		/*
-		 * We have to be able to interrupt this wait
-		 * to bring down the daemons ...
-		 */
-		set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&rqstp->rq_wait, &wait);
-		spin_unlock_bh(&pool->sp_lock);
-
-		schedule_timeout(timeout);
-
-		try_to_freeze();
-
-		spin_lock_bh(&pool->sp_lock);
-		remove_wait_queue(&rqstp->rq_wait, &wait);
-
-		xprt = rqstp->rq_xprt;
-		if (!xprt) {
-			svc_thread_dequeue(pool, rqstp);
-			spin_unlock_bh(&pool->sp_lock);
-			dprintk("svc: server %p, no data yet\n", rqstp);
-			return signalled()? -EINTR : -EAGAIN;
-		}
-	}
-	spin_unlock_bh(&pool->sp_lock);
-
-	len = 0;
-	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
-		dprintk("svc_recv: found XPT_CLOSE\n");
-		svc_delete_xprt(xprt);
-	} else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
-		struct svc_xprt *newxpt;
-		newxpt = xprt->xpt_ops->xpo_accept(xprt);
-		if (newxpt) {
-			/*
-			 * We know this module_get will succeed because the
-			 * listener holds a reference too
-			 */
-			__module_get(newxpt->xpt_class->xcl_owner);
-			svc_check_conn_limits(xprt->xpt_server);
-			spin_lock_bh(&serv->sv_lock);
-			set_bit(XPT_TEMP, &newxpt->xpt_flags);
-			list_add(&newxpt->xpt_list, &serv->sv_tempsocks);
-			serv->sv_tmpcnt++;
-			if (serv->sv_temptimer.function == NULL) {
-				/* setup timer to age temp sockets */
-				setup_timer(&serv->sv_temptimer,
-					    svc_age_temp_xprts,
-					    (unsigned long)serv);
-				mod_timer(&serv->sv_temptimer,
-					  jiffies + svc_conn_age_period * HZ);
-			}
-			spin_unlock_bh(&serv->sv_lock);
-			svc_xprt_received(newxpt);
-		}
-		svc_xprt_received(xprt);
-	} else {
-		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
-			rqstp, pool->sp_id, xprt,
-			atomic_read(&xprt->xpt_ref.refcount));
-		rqstp->rq_deferred = svc_deferred_dequeue(xprt);
-		if (rqstp->rq_deferred) {
-			svc_xprt_received(xprt);
-			len = svc_deferred_recv(rqstp);
-		} else
-			len = xprt->xpt_ops->xpo_recvfrom(rqstp);
-		dprintk("svc: got len=%d\n", len);
-	}
-
-	/* No data, incomplete (TCP) read, or accept() */
-	if (len == 0 || len == -EAGAIN) {
-		rqstp->rq_res.len = 0;
-		svc_xprt_release(rqstp);
-		return -EAGAIN;
-	}
-	clear_bit(XPT_OLD, &xprt->xpt_flags);
-
-	rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
-	rqstp->rq_chandle.defer = svc_defer;
-
-	if (serv->sv_stats)
-		serv->sv_stats->netcnt++;
-	return len;
-}
-
-/*
- * Drop request
- */
-void
-svc_drop(struct svc_rqst *rqstp)
-{
-	dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
-	svc_xprt_release(rqstp);
-}
-
-/*
- * Return reply to client.
- */
-int
-svc_send(struct svc_rqst *rqstp)
-{
-	struct svc_xprt	*xprt;
-	int		len;
-	struct xdr_buf	*xb;
-
-	xprt = rqstp->rq_xprt;
-	if (!xprt)
-		return -EFAULT;
-
-	/* release the receive skb before sending the reply */
-	rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
-
-	/* calculate over-all length */
-	xb = & rqstp->rq_res;
-	xb->len = xb->head[0].iov_len +
-		xb->page_len +
-		xb->tail[0].iov_len;
-
-	/* Grab mutex to serialize outgoing data. */
-	mutex_lock(&xprt->xpt_mutex);
-	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
-		len = -ENOTCONN;
-	else
-		len = xprt->xpt_ops->xpo_sendto(rqstp);
-	mutex_unlock(&xprt->xpt_mutex);
-	svc_xprt_release(rqstp);
-
-	if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
-		return 0;
-	return len;
-}
-
-/*
- * Timer function to close old temporary sockets, using
- * a mark-and-sweep algorithm.
- */
-static void
-svc_age_temp_xprts(unsigned long closure)
-{
-	struct svc_serv *serv = (struct svc_serv *)closure;
-	struct svc_xprt *xprt;
-	struct list_head *le, *next;
-	LIST_HEAD(to_be_aged);
-
-	dprintk("svc_age_temp_xprts\n");
-
-	if (!spin_trylock_bh(&serv->sv_lock)) {
-		/* busy, try again 1 sec later */
-		dprintk("svc_age_temp_xprts: busy\n");
-		mod_timer(&serv->sv_temptimer, jiffies + HZ);
-		return;
-	}
-
-	list_for_each_safe(le, next, &serv->sv_tempsocks) {
-		xprt = list_entry(le, struct svc_xprt, xpt_list);
-
-		/* First time through, just mark it OLD. Second time
-		 * through, close it. */
-		if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags))
-			continue;
-		if (atomic_read(&xprt->xpt_ref.refcount) > 1
-		    || test_bit(XPT_BUSY, &xprt->xpt_flags))
-			continue;
-		svc_xprt_get(xprt);
-		list_move(le, &to_be_aged);
-		set_bit(XPT_CLOSE, &xprt->xpt_flags);
-		set_bit(XPT_DETACHED, &xprt->xpt_flags);
-	}
-	spin_unlock_bh(&serv->sv_lock);
-
-	while (!list_empty(&to_be_aged)) {
-		le = to_be_aged.next;
-		/* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */
-		list_del_init(le);
-		xprt = list_entry(le, struct svc_xprt, xpt_list);
-
-		dprintk("queuing xprt %p for closing\n", xprt);
-
-		/* a thread will dequeue and close it soon */
-		svc_xprt_enqueue(xprt);
-		svc_xprt_put(xprt);
-	}
-
-	mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
-}
-
 /*
  * Initialize socket for RPC use and create svc_sock struct
  * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
@@ -1931,160 +1369,3 @@ static void svc_sock_free(struct svc_xprt *xprt)
 	kfree(svsk);
 }
 
-/*
- * Remove a dead transport
- */
-static void
-svc_delete_xprt(struct svc_xprt *xprt)
-{
-	struct svc_serv	*serv = xprt->xpt_server;
-
-	dprintk("svc: svc_delete_xprt(%p)\n", xprt);
-	xprt->xpt_ops->xpo_detach(xprt);
-
-	spin_lock_bh(&serv->sv_lock);
-	if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
-		list_del_init(&xprt->xpt_list);
-	/*
-	 * We used to delete the transport from whichever list
-	 * it's sk_xprt.xpt_ready node was on, but we don't actually
-	 * need to.  This is because the only time we're called
-	 * while still attached to a queue, the queue itself
-	 * is about to be destroyed (in svc_destroy).
-	 */
-	if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) {
-		BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2);
-		svc_xprt_put(xprt);
-		if (test_bit(XPT_TEMP, &xprt->xpt_flags))
-			serv->sv_tmpcnt--;
-	}
-	spin_unlock_bh(&serv->sv_lock);
-}
-
-static void svc_close_xprt(struct svc_xprt *xprt)
-{
-	set_bit(XPT_CLOSE, &xprt->xpt_flags);
-	if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
-		/* someone else will have to effect the close */
-		return;
-
-	svc_xprt_get(xprt);
-	svc_delete_xprt(xprt);
-	clear_bit(XPT_BUSY, &xprt->xpt_flags);
-	svc_xprt_put(xprt);
-}
-
-void svc_close_all(struct list_head *xprt_list)
-{
-	struct svc_xprt *xprt;
-	struct svc_xprt *tmp;
-
-	list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
-		set_bit(XPT_CLOSE, &xprt->xpt_flags);
-		if (test_bit(XPT_BUSY, &xprt->xpt_flags)) {
-			/* Waiting to be processed, but no threads left,
-			 * So just remove it from the waiting list
-			 */
-			list_del_init(&xprt->xpt_ready);
-			clear_bit(XPT_BUSY, &xprt->xpt_flags);
-		}
-		svc_close_xprt(xprt);
-	}
-}
-
-/*
- * Handle defer and revisit of requests
- */
-
-static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
-{
-	struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle);
-	struct svc_xprt *xprt = dr->xprt;
-
-	if (too_many) {
-		svc_xprt_put(xprt);
-		kfree(dr);
-		return;
-	}
-	dprintk("revisit queued\n");
-	dr->xprt = NULL;
-	spin_lock(&xprt->xpt_lock);
-	list_add(&dr->handle.recent, &xprt->xpt_deferred);
-	spin_unlock(&xprt->xpt_lock);
-	set_bit(XPT_DEFERRED, &xprt->xpt_flags);
-	svc_xprt_enqueue(xprt);
-	svc_xprt_put(xprt);
-}
-
-static struct cache_deferred_req *
-svc_defer(struct cache_req *req)
-{
-	struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
-	int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len);
-	struct svc_deferred_req *dr;
-
-	if (rqstp->rq_arg.page_len)
-		return NULL; /* if more than a page, give up FIXME */
-	if (rqstp->rq_deferred) {
-		dr = rqstp->rq_deferred;
-		rqstp->rq_deferred = NULL;
-	} else {
-		int skip  = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
-		/* FIXME maybe discard if size too large */
-		dr = kmalloc(size, GFP_KERNEL);
-		if (dr == NULL)
-			return NULL;
-
-		dr->handle.owner = rqstp->rq_server;
-		dr->prot = rqstp->rq_prot;
-		memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen);
-		dr->addrlen = rqstp->rq_addrlen;
-		dr->daddr = rqstp->rq_daddr;
-		dr->argslen = rqstp->rq_arg.len >> 2;
-		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
-	}
-	svc_xprt_get(rqstp->rq_xprt);
-	dr->xprt = rqstp->rq_xprt;
-
-	dr->handle.revisit = svc_revisit;
-	return &dr->handle;
-}
-
-/*
- * recv data from a deferred request into an active one
- */
-static int svc_deferred_recv(struct svc_rqst *rqstp)
-{
-	struct svc_deferred_req *dr = rqstp->rq_deferred;
-
-	rqstp->rq_arg.head[0].iov_base = dr->args;
-	rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
-	rqstp->rq_arg.page_len = 0;
-	rqstp->rq_arg.len = dr->argslen<<2;
-	rqstp->rq_prot        = dr->prot;
-	memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
-	rqstp->rq_addrlen     = dr->addrlen;
-	rqstp->rq_daddr       = dr->daddr;
-	rqstp->rq_respages    = rqstp->rq_pages;
-	return dr->argslen<<2;
-}
-
-
-static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
-{
-	struct svc_deferred_req *dr = NULL;
-
-	if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags))
-		return NULL;
-	spin_lock(&xprt->xpt_lock);
-	clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
-	if (!list_empty(&xprt->xpt_deferred)) {
-		dr = list_entry(xprt->xpt_deferred.next,
-				struct svc_deferred_req,
-				handle.recent);
-		list_del_init(&dr->handle.recent);
-		set_bit(XPT_DEFERRED, &xprt->xpt_flags);
-	}
-	spin_unlock(&xprt->xpt_lock);
-	return dr;
-}

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 33/38] svc: Add transport hdr size for defer/revisit
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (30 preceding siblings ...)
  2007-11-29 22:41   ` [RFC,PATCH 32/38] svc: Move the xprt independent code to the svc_xprt.c file Tom Tucker
@ 2007-11-29 22:41   ` Tom Tucker
       [not found]     ` <20071129224103.14563.72780.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  2007-11-29 22:41   ` [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files Tom Tucker
                     ` (5 subsequent siblings)
  37 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:41 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Some transports have a header in front of the RPC header. The current
defer/revisit processing considers only the iov_len and arg_len to 
determine how much to back up when saving the original request
to revisit. Add a field to the rqstp structure to save the size
of the transport header so svc_defer can correctly compute
the start of a request. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc.h |    2 ++
 net/sunrpc/svc_xprt.c      |   36 +++++++++++++++++++++++++++---------
 net/sunrpc/svcsock.c       |    2 ++
 3 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 04eb20e..f2ada2a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -217,6 +217,7 @@ struct svc_rqst {
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
 	struct svc_deferred_req*rq_deferred;	/* deferred request we are replaying */
 
+	size_t			rq_xprt_hlen;	/* xprt header len */
 	struct xdr_buf		rq_arg;
 	struct xdr_buf		rq_res;
 	struct page *		rq_pages[RPCSVC_MAXPAGES];
@@ -322,6 +323,7 @@ struct svc_deferred_req {
 	size_t			addrlen;
 	union svc_addr_u	daddr;	/* where reply must come from */
 	struct cache_deferred_req handle;
+	int			xprt_hlen;
 	int			argslen;
 	__be32			args[0];
 };
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 56204e9..b31ba0e 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -29,7 +29,6 @@
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svc_xprt.h>
 
@@ -827,10 +826,18 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 	svc_xprt_put(xprt);
 }
 
+/*
+ * Save the request off for later processing. The request buffer looks
+ * like this:
+ *
+ * <xprt-header><rpc-header><rpc-pagelist><rpc-tail>
+ *
+ * This code can only handle requests that consist of an xprt-header
+ * and rpc-header.
+ */
 static struct cache_deferred_req *svc_defer(struct cache_req *req)
 {
 	struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
-	int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len);
 	struct svc_deferred_req *dr;
 
 	if (rqstp->rq_arg.page_len)
@@ -839,8 +846,10 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
 		dr = rqstp->rq_deferred;
 		rqstp->rq_deferred = NULL;
 	} else {
-		int skip  = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
+		int skip;
+		int size;
 		/* FIXME maybe discard if size too large */
+		size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len;
 		dr = kmalloc(size, GFP_KERNEL);
 		if (dr == NULL)
 			return NULL;
@@ -851,8 +860,12 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
 		dr->addrlen = rqstp->rq_addrlen;
 		dr->daddr = rqstp->rq_daddr;
 		dr->argslen = rqstp->rq_arg.len >> 2;
-		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip,
-		       dr->argslen<<2);
+		dr->xprt_hlen = rqstp->rq_xprt_hlen;
+
+		/* back up head to the start of the buffer and copy */
+		skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
+		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
+		       dr->argslen << 2);
 	}
 	svc_xprt_get(rqstp->rq_xprt);
 	dr->xprt = rqstp->rq_xprt;
@@ -868,16 +881,21 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
 {
 	struct svc_deferred_req *dr = rqstp->rq_deferred;
 
-	rqstp->rq_arg.head[0].iov_base = dr->args;
-	rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
+	/* setup iov_base past transport header */
+	rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
+	/* The iov_len does not include the transport header bytes */
+	rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen;
 	rqstp->rq_arg.page_len = 0;
-	rqstp->rq_arg.len = dr->argslen<<2;
+	/* The rq_arg.len includes the transport header bytes */
+	rqstp->rq_arg.len     = dr->argslen<<2;
 	rqstp->rq_prot        = dr->prot;
 	memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
 	rqstp->rq_addrlen     = dr->addrlen;
+	/* Save off transport header len in case we get deferred again */
+	rqstp->rq_xprt_hlen   = dr->xprt_hlen;
 	rqstp->rq_daddr       = dr->daddr;
 	rqstp->rq_respages    = rqstp->rq_pages;
-	return dr->argslen<<2;
+	return (dr->argslen<<2) - dr->xprt_hlen;
 }
 
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 23a2ab6..03207c9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -397,6 +397,8 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
 	};
 	int len;
 
+	rqstp->rq_xprt_hlen = 0;
+
 	len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
 				msg.msg_flags);
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (31 preceding siblings ...)
  2007-11-29 22:41   ` [RFC,PATCH 33/38] svc: Add transport hdr size for defer/revisit Tom Tucker
@ 2007-11-29 22:41   ` Tom Tucker
       [not found]     ` <20071129224105.14563.48684.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  2007-11-29 22:41   ` [RFC,PATCH 35/38] knfsd: Support adding transports by writing portlist file Tom Tucker
                     ` (4 subsequent siblings)
  37 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:41 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Add a file that when read lists the set of registered svc
transports.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    2 ++
 net/sunrpc/svc_xprt.c           |   28 ++++++++++++++++++++++++++++
 net/sunrpc/sysctl.c             |   31 +++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 36f8b09..c2fa41d 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -79,11 +79,13 @@ void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 int	svc_port_is_privileged(struct sockaddr *sin);
 void	svc_delete_xprt(struct svc_xprt *xprt);
+int	svc_print_xprts(char *buf, int maxlen);
 
 static inline void svc_xprt_get(struct svc_xprt *xprt)
 {
 	kref_get(&xprt->xpt_ref);
 }
+
 static inline void svc_xprt_set_local(struct svc_xprt *xprt,
 				      struct sockaddr *sa, int salen)
 {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b31ba0e..7416e66 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -93,6 +93,34 @@ int svc_unreg_xprt_class(struct svc_xprt_class *xcl)
 }
 EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
 
+/*
+ * Format the transport list for printing
+ */
+int svc_print_xprts(char *buf, int maxlen)
+{
+	struct list_head *le;
+	char tmpstr[80];
+	int len = 0;
+	buf[0] = '\0';
+
+	spin_lock(&svc_xprt_class_lock);
+	list_for_each(le, &svc_xprt_class_list) {
+		int slen;
+		struct svc_xprt_class *xcl =
+			list_entry(le, struct svc_xprt_class, xcl_list);
+
+		sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
+		slen = strlen(tmpstr);
+		if (len + slen > maxlen)
+			break;
+		len += slen;
+		strcat(buf, tmpstr);
+	}
+	spin_unlock(&svc_xprt_class_lock);
+
+	return len;
+}
+
 static void svc_xprt_free(struct kref *kref)
 {
 	struct svc_xprt *xprt =
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 2be714e..fd7cf59 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -18,6 +18,7 @@
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/svc_xprt.h>
 
 /*
  * Declare the debug flags here
@@ -48,6 +49,30 @@ rpc_unregister_sysctl(void)
 	}
 }
 
+static int proc_do_xprt(ctl_table *table, int write, struct file *file,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	char tmpbuf[256];
+	int len;
+	if ((*ppos && !write) || !*lenp) {
+		*lenp = 0;
+		return 0;
+	}
+	if (write)
+		return -EINVAL;
+	else {
+		len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
+		if (!access_ok(VERIFY_WRITE, buffer, len))
+			return -EFAULT;
+
+		if (__copy_to_user(buffer, tmpbuf, len))
+			return -EFAULT;
+	}
+	*lenp -= len;
+	*ppos += len;
+	return 0;
+}
+
 static int
 proc_dodebug(ctl_table *table, int write, struct file *file,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -140,6 +165,12 @@ static ctl_table debug_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dodebug
 	},
+	{
+		.procname	= "transports",
+		.maxlen		= 256,
+		.mode		= 0444,
+		.proc_handler	= &proc_do_xprt,
+	},
 	{ .ctl_name = 0 }
 };
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 35/38] knfsd: Support adding transports by writing portlist file
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (32 preceding siblings ...)
  2007-11-29 22:41   ` [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files Tom Tucker
@ 2007-11-29 22:41   ` Tom Tucker
  2007-11-29 22:41   ` [RFC,PATCH 36/38] svc: Add svc API that queries for a transport instance Tom Tucker
                     ` (3 subsequent siblings)
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:41 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Update the write handler for the portlist file to allow creating new
listening endpoints on a transport. The general form of the string is:

<transport_name><space><port number>

For example:

tcp 2049

This is intended to support the creation of a listening endpoint for
RDMA transports without adding #ifdef code to the nfssvc.c file.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 fs/nfsd/nfsctl.c |   16 ++++++++++++++++
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 77dc989..1f998c0 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -554,6 +554,22 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 		kfree(toclose);
 		return len;
 	}
+	/*
+	 * Add a transport listener by writing it's transport name
+	 */
+	if (isalnum(buf[0])) {
+		int err;
+		char transport[16];
+		int port;
+		if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
+			err = nfsd_create_serv();
+			if (!err)
+				err = svc_create_xprt(nfsd_serv,
+						      transport, port,
+						      SVC_SOCK_ANONYMOUS);
+			return err < 0 ? err : 0;
+		}
+	}
 	return -EINVAL;
 }
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 36/38] svc: Add svc API that queries for a transport instance
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (33 preceding siblings ...)
  2007-11-29 22:41   ` [RFC,PATCH 35/38] knfsd: Support adding transports by writing portlist file Tom Tucker
@ 2007-11-29 22:41   ` Tom Tucker
       [not found]     ` <20071129224109.14563.34563.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
  2007-11-29 22:41   ` [RFC,PATCH 37/38] knfsd: Modify write_ports to use svc_find_xprt service Tom Tucker
                     ` (2 subsequent siblings)
  37 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:41 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Add a new svc function that allows a service to query whether a 
transport instance has already been created. This is used in lockd 
to determine whether or not a transport needs to be created when
a lockd instance is brought up. 

Specifying 0 for the address family or port is effectively a wild-card,
and will result in matching the first transport in the service's list
that has a matching class name.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 fs/lockd/svc.c                  |   16 ++--------------
 include/linux/sunrpc/svc_xprt.h |    2 ++
 net/sunrpc/svc_xprt.c           |   31 +++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index a8e79a9..470af01 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -219,18 +219,6 @@ lockd(struct svc_rqst *rqstp)
 	module_put_and_exit(0);
 }
 
-static int find_xprt(struct svc_serv *serv, char *proto)
-{
-	struct svc_xprt *xprt;
-	int found = 0;
-	list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
-		if (strcmp(xprt->xpt_class->xcl_name, proto) == 0) {
-			found = 1;
-			break;
-		}
-	return found;
-}
-
 /*
  * Make any sockets that are needed but not present.
  * If nlm_udpport or nlm_tcpport were set as module
@@ -242,11 +230,11 @@ static int make_socks(struct svc_serv *serv, int proto)
 	int err = 0;
 
 	if (proto == IPPROTO_UDP || nlm_udpport)
-		if (!find_xprt(serv, "udp"))
+		if (!svc_find_xprt(serv, "udp", 0, 0))
 			err = svc_create_xprt(serv, "udp", nlm_udpport,
 					      SVC_SOCK_DEFAULTS);
 	if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport))
-		if (!find_xprt(serv, "tcp"))
+		if (!svc_find_xprt(serv, "tcp", 0, 0))
 			err = svc_create_xprt(serv, "tcp", nlm_tcpport,
 					      SVC_SOCK_DEFAULTS);
 
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index c2fa41d..30fcc82 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -80,6 +80,8 @@ void	svc_xprt_enqueue(struct svc_xprt *xprt);
 int	svc_port_is_privileged(struct sockaddr *sin);
 void	svc_delete_xprt(struct svc_xprt *xprt);
 int	svc_print_xprts(char *buf, int maxlen);
+struct svc_xprt *
+svc_find_xprt(struct svc_serv *serv, char *xprt_class, int af, int port);
 
 static inline void svc_xprt_get(struct svc_xprt *xprt)
 {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 7416e66..247f0fb 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -945,3 +945,34 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
 	spin_unlock(&xprt->xpt_lock);
 	return dr;
 }
+
+/*
+ * Return the transport instance pointer for the endpoint accepting
+ * connections/peer traffic from the specified transport class,
+ * address family and port.
+ */
+struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
+			       int af, int port)
+{
+	struct svc_xprt *xprt;
+	struct svc_xprt *found = NULL;
+
+	/* Sanity check the args */
+	if (!serv || !xcl_name)
+		return found;
+
+	spin_lock_bh(&serv->sv_lock);
+	list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
+		if (strcmp(xprt->xpt_class->xcl_name, xcl_name))
+			continue;
+		if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
+			continue;
+		if (port && port != svc_xprt_local_port(xprt))
+			continue;
+		found = xprt;
+		break;
+	}
+	spin_unlock_bh(&serv->sv_lock);
+	return found;
+}
+EXPORT_SYMBOL_GPL(svc_find_xprt);

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 37/38] knfsd: Modify write_ports to use svc_find_xprt service
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (34 preceding siblings ...)
  2007-11-29 22:41   ` [RFC,PATCH 36/38] svc: Add svc API that queries for a transport instance Tom Tucker
@ 2007-11-29 22:41   ` Tom Tucker
  2007-11-29 22:41   ` [RFC,PATCH 38/38] svc: Add svc_xprt_names service to replace svc_sock_names Tom Tucker
  2007-11-29 23:18   ` [RFC,PATCH 00/38] SVC Transport Switch Tom Tucker
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:41 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


This patch enhances the write_ports function as follows:

- Check if a server transport instance already exists before attempting to 
  create a new one, and 

- Implement the ability to remove a previously created server transport
  instance. 

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 fs/nfsd/nfsctl.c      |   36 +++++++++++++++++++++++++++++++++---
 net/sunrpc/svc_xprt.c |    1 +
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1f998c0..e0e53e1 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -540,7 +540,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 		}
 		return err < 0 ? err : 0;
 	}
-	if (buf[0] == '-') {
+	if (buf[0] == '-' && isdigit(buf[1])) {
 		char *toclose = kstrdup(buf+1, GFP_KERNEL);
 		int len = 0;
 		if (!toclose)
@@ -557,16 +557,46 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 	/*
 	 * Add a transport listener by writing it's transport name
 	 */
-	if (isalnum(buf[0])) {
+	if (isalpha(buf[0])) {
 		int err;
 		char transport[16];
 		int port;
 		if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
 			err = nfsd_create_serv();
-			if (!err)
+			if (!err) {
+				if (svc_find_xprt(nfsd_serv, transport,
+						  AF_UNSPEC, port))
+					return -EADDRINUSE;
+
 				err = svc_create_xprt(nfsd_serv,
 						      transport, port,
 						      SVC_SOCK_ANONYMOUS);
+			}
+			return err < 0 ? err : 0;
+		}
+	}
+	/*
+	 * Remove a transport by writing it's transport name and port number
+	 */
+	if (buf[0] == '-' && isalpha(buf[1])) {
+		struct svc_xprt *xprt;
+		int err = -EINVAL;
+		char transport[16];
+		int port;
+		if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
+			if (port == 0)
+				return -EINVAL;
+			lock_kernel();
+			if (nfsd_serv) {
+				xprt = svc_find_xprt(nfsd_serv, transport,
+						     AF_UNSPEC, port);
+				if (xprt) {
+					svc_close_xprt(xprt);
+					err = 0;
+				} else
+					err = -ENOENT;
+			}
+			unlock_kernel();
 			return err < 0 ? err : 0;
 		}
 	}
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 247f0fb..57d50db 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -810,6 +810,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
 	clear_bit(XPT_BUSY, &xprt->xpt_flags);
 	svc_xprt_put(xprt);
 }
+EXPORT_SYMBOL_GPL(svc_close_xprt);
 
 void svc_close_all(struct list_head *xprt_list)
 {

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 38/38] svc: Add svc_xprt_names service to replace svc_sock_names
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (35 preceding siblings ...)
  2007-11-29 22:41   ` [RFC,PATCH 37/38] knfsd: Modify write_ports to use svc_find_xprt service Tom Tucker
@ 2007-11-29 22:41   ` Tom Tucker
  2007-11-29 23:18   ` [RFC,PATCH 00/38] SVC Transport Switch Tom Tucker
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:41 UTC (permalink / raw)
  To: bfields-ag9A2Eb6PFsgsBAKwltoeQ; +Cc: linux-nfs


Create a transport independent version of the svc_sock_names function.

The toclose capability of the svc_sock_names service can be implemented
using the svc_xprt_find and svc_xprt_close services.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 fs/nfsd/nfsctl.c                |    2 +-
 include/linux/sunrpc/svc_xprt.h |    1 +
 net/sunrpc/svc_xprt.c           |   35 +++++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e0e53e1..f6f68fa 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -503,7 +503,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
 		int len = 0;
 		lock_kernel();
 		if (nfsd_serv)
-			len = svc_sock_names(buf, nfsd_serv, NULL);
+			len = svc_xprt_names(nfsd_serv, buf, 0);
 		unlock_kernel();
 		return len;
 	}
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 30fcc82..d19c91e 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -80,6 +80,7 @@ void	svc_xprt_enqueue(struct svc_xprt *xprt);
 int	svc_port_is_privileged(struct sockaddr *sin);
 void	svc_delete_xprt(struct svc_xprt *xprt);
 int	svc_print_xprts(char *buf, int maxlen);
+int	svc_xprt_names(struct svc_serv *serv, char *buf, int buflen);
 struct svc_xprt *
 svc_find_xprt(struct svc_serv *serv, char *xprt_class, int af, int port);
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 57d50db..68ef65e 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -977,3 +977,38 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
 	return found;
 }
 EXPORT_SYMBOL_GPL(svc_find_xprt);
+
+/*
+ * Format a buffer with a list of the active transports. A zero for
+ * the buflen parameter disables target buffer overflow checking.
+ */
+int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen)
+{
+	struct svc_xprt *xprt;
+	char xprt_str[64];
+	int totlen = 0;
+	int len;
+
+	/* Sanity check args */
+	if (!serv)
+		return 0;
+
+	spin_lock_bh(&serv->sv_lock);
+	list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
+		len = snprintf(xprt_str, sizeof(xprt_str),
+			       "%s %d\n", xprt->xpt_class->xcl_name,
+			       svc_xprt_local_port(xprt));
+		/* If the string was truncated, replace with error string */
+		if (len >= sizeof(xprt_str))
+			strcpy(xprt_str, "name-too-long\n");
+		/* Don't overflow buffer */
+		len = strlen(xprt_str);
+		if (buflen && (len + totlen >= buflen))
+			break;
+		strcpy(buf+totlen, xprt_str);
+		totlen += len;
+	}
+	spin_unlock_bh(&serv->sv_lock);
+	return totlen;
+}
+EXPORT_SYMBOL_GPL(svc_xprt_names);

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files
       [not found] ` <20071129225142.15107.46200.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-11-29 22:54   ` Tom Tucker
  0 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:54 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs


Add a file that when read lists the set of registered svc
transports.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    2 ++
 net/sunrpc/svc_xprt.c           |   28 ++++++++++++++++++++++++++++
 net/sunrpc/sysctl.c             |   31 +++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 36f8b09..c2fa41d 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -79,11 +79,13 @@ void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 int	svc_port_is_privileged(struct sockaddr *sin);
 void	svc_delete_xprt(struct svc_xprt *xprt);
+int	svc_print_xprts(char *buf, int maxlen);
 
 static inline void svc_xprt_get(struct svc_xprt *xprt)
 {
 	kref_get(&xprt->xpt_ref);
 }
+
 static inline void svc_xprt_set_local(struct svc_xprt *xprt,
 				      struct sockaddr *sa, int salen)
 {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b31ba0e..7416e66 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -93,6 +93,34 @@ int svc_unreg_xprt_class(struct svc_xprt_class *xcl)
 }
 EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
 
+/*
+ * Format the transport list for printing
+ */
+int svc_print_xprts(char *buf, int maxlen)
+{
+	struct list_head *le;
+	char tmpstr[80];
+	int len = 0;
+	buf[0] = '\0';
+
+	spin_lock(&svc_xprt_class_lock);
+	list_for_each(le, &svc_xprt_class_list) {
+		int slen;
+		struct svc_xprt_class *xcl =
+			list_entry(le, struct svc_xprt_class, xcl_list);
+
+		sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
+		slen = strlen(tmpstr);
+		if (len + slen > maxlen)
+			break;
+		len += slen;
+		strcat(buf, tmpstr);
+	}
+	spin_unlock(&svc_xprt_class_lock);
+
+	return len;
+}
+
 static void svc_xprt_free(struct kref *kref)
 {
 	struct svc_xprt *xprt =
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 2be714e..fd7cf59 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -18,6 +18,7 @@
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/svc_xprt.h>
 
 /*
  * Declare the debug flags here
@@ -48,6 +49,30 @@ rpc_unregister_sysctl(void)
 	}
 }
 
+static int proc_do_xprt(ctl_table *table, int write, struct file *file,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	char tmpbuf[256];
+	int len;
+	if ((*ppos && !write) || !*lenp) {
+		*lenp = 0;
+		return 0;
+	}
+	if (write)
+		return -EINVAL;
+	else {
+		len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
+		if (!access_ok(VERIFY_WRITE, buffer, len))
+			return -EFAULT;
+
+		if (__copy_to_user(buffer, tmpbuf, len))
+			return -EFAULT;
+	}
+	*lenp -= len;
+	*ppos += len;
+	return 0;
+}
+
 static int
 proc_dodebug(ctl_table *table, int write, struct file *file,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -140,6 +165,12 @@ static ctl_table debug_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dodebug
 	},
+	{
+		.procname	= "transports",
+		.maxlen		= 256,
+		.mode		= 0444,
+		.proc_handler	= &proc_do_xprt,
+	},
 	{ .ctl_name = 0 }
 };
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files
       [not found] ` <20071129225510.15275.82660.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-11-29 22:56   ` Tom Tucker
  0 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 22:56 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs


Add a file that when read lists the set of registered svc
transports.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---

 include/linux/sunrpc/svc_xprt.h |    2 ++
 net/sunrpc/svc_xprt.c           |   28 ++++++++++++++++++++++++++++
 net/sunrpc/sysctl.c             |   31 +++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 36f8b09..c2fa41d 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -79,11 +79,13 @@ void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 int	svc_port_is_privileged(struct sockaddr *sin);
 void	svc_delete_xprt(struct svc_xprt *xprt);
+int	svc_print_xprts(char *buf, int maxlen);
 
 static inline void svc_xprt_get(struct svc_xprt *xprt)
 {
 	kref_get(&xprt->xpt_ref);
 }
+
 static inline void svc_xprt_set_local(struct svc_xprt *xprt,
 				      struct sockaddr *sa, int salen)
 {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b31ba0e..7416e66 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -93,6 +93,34 @@ int svc_unreg_xprt_class(struct svc_xprt_class *xcl)
 }
 EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
 
+/*
+ * Format the transport list for printing
+ */
+int svc_print_xprts(char *buf, int maxlen)
+{
+	struct list_head *le;
+	char tmpstr[80];
+	int len = 0;
+	buf[0] = '\0';
+
+	spin_lock(&svc_xprt_class_lock);
+	list_for_each(le, &svc_xprt_class_list) {
+		int slen;
+		struct svc_xprt_class *xcl =
+			list_entry(le, struct svc_xprt_class, xcl_list);
+
+		sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
+		slen = strlen(tmpstr);
+		if (len + slen > maxlen)
+			break;
+		len += slen;
+		strcat(buf, tmpstr);
+	}
+	spin_unlock(&svc_xprt_class_lock);
+
+	return len;
+}
+
 static void svc_xprt_free(struct kref *kref)
 {
 	struct svc_xprt *xprt =
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 2be714e..fd7cf59 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -18,6 +18,7 @@
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/svc_xprt.h>
 
 /*
  * Declare the debug flags here
@@ -48,6 +49,30 @@ rpc_unregister_sysctl(void)
 	}
 }
 
+static int proc_do_xprt(ctl_table *table, int write, struct file *file,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	char tmpbuf[256];
+	int len;
+	if ((*ppos && !write) || !*lenp) {
+		*lenp = 0;
+		return 0;
+	}
+	if (write)
+		return -EINVAL;
+	else {
+		len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
+		if (!access_ok(VERIFY_WRITE, buffer, len))
+			return -EFAULT;
+
+		if (__copy_to_user(buffer, tmpbuf, len))
+			return -EFAULT;
+	}
+	*lenp -= len;
+	*ppos += len;
+	return 0;
+}
+
 static int
 proc_dodebug(ctl_table *table, int write, struct file *file,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -140,6 +165,12 @@ static ctl_table debug_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dodebug
 	},
+	{
+		.procname	= "transports",
+		.maxlen		= 256,
+		.mode		= 0444,
+		.proc_handler	= &proc_do_xprt,
+	},
 	{ .ctl_name = 0 }
 };
 

^ permalink raw reply related	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 00/38] SVC Transport Switch
       [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
                     ` (36 preceding siblings ...)
  2007-11-29 22:41   ` [RFC,PATCH 38/38] svc: Add svc_xprt_names service to replace svc_sock_names Tom Tucker
@ 2007-11-29 23:18   ` Tom Tucker
  37 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-29 23:18 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs


I finger fumbled Bruce's address in the first set. I sent another one
because I wanted people to be able to reply and get Bruce's correct 
address information in the reply. 

Please forgive me for the clutter I've caused. 
Tom

On Thu, 2007-11-29 at 16:39 -0600, Tom Tucker wrote:
> The following series implements a pluggable transport switch for
> RPC servers. This patchset is a rollup of the original
> plus incremental patches.
> 
> In addition to the incremental change rollup:
> 
> - The address management logic in svc_xprt was cleaned up a
>   based on feedback from Chuck Lever,
> 
> - A race was fixed whereby UDP RPC from different clients in parallel
>   could corrupt each other's addresses.
> 
> - Changes were made to the sysctl implementation to comply 
>   with the new 2.6.24 requirements regarding sysctl ids.
> 
> The following testing was done:
> 
> - Connectathon on V3 and V4 on TCP, UDP and RDMA mounts
> 
> - Kernel build on V3 RDMA mount.
> 
> This patchset is against the 2.6.24-rc3 kernel tree.
> 


^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 04/38] svc: Add a max payload value to the transport
       [not found]     ` <20071129224002.14563.96227.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-11-30 20:22       ` Chuck Lever
  2007-11-30 20:51         ` Tom Tucker
  0 siblings, 1 reply; 62+ messages in thread
From: Chuck Lever @ 2007-11-30 20:22 UTC (permalink / raw)
  To: Tom Tucker; +Cc: bfields-ag9A2Eb6PFsgsBAKwltoeQ, linux-nfs

On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> The svc_max_payload function currently looks at the socket type
> to determine the max payload. Add a max payload value to  
> svc_xprt_class
> so it can be returned directly.
>
> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
>
>  include/linux/sunrpc/svc_xprt.h |    1 +
>  net/sunrpc/svc.c                |    4 +---
>  net/sunrpc/svcsock.c            |    2 ++
>  3 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> svc_xprt.h
> index a8b1da8..b4ce054 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -17,6 +17,7 @@ struct svc_xprt_class {
>  	struct module		*xcl_owner;
>  	struct svc_xprt_ops	*xcl_ops;
>  	struct list_head	xcl_list;
> +	u32			xcl_max_payload;
>  };
>
>  struct svc_xprt {
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index a4a6bf7..ce59044 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -1054,10 +1054,8 @@ err_bad:
>   */
>  u32 svc_max_payload(const struct svc_rqst *rqstp)
>  {
> -	int max = RPCSVC_MAXPAYLOAD_TCP;
> +	int max = rqstp->rq_xprt->xpt_class->xcl_max_payload;

Nit: xcl_max_payload is unsigned, as is sv_max_payload, and so is the  
return type of the svc_max_payload() function, so the automatic  
variable "max" should also be an unsigned integral type.  (Implicit  
type conversion in the comparison below prevents this from being an  
actual bug).

> -	if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM)
> -		max = RPCSVC_MAXPAYLOAD_UDP;
>  	if (rqstp->rq_server->sv_max_payload < max)
>  		max = rqstp->rq_server->sv_max_payload;
>  	return max;
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 4755467..ca9b8d8 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -906,6 +906,7 @@ static struct svc_xprt_ops svc_udp_ops = {
>  static struct svc_xprt_class svc_udp_class = {
>  	.xcl_name = "udp",
>  	.xcl_ops = &svc_udp_ops,
> +	.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
>  };
>
>  static void
> @@ -1359,6 +1360,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
>  static struct svc_xprt_class svc_tcp_class = {
>  	.xcl_name = "tcp",
>  	.xcl_ops = &svc_tcp_ops,
> +	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
>  };
>
>  void svc_init_xprt_sock(void)

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 09/38] svc: Add a transport function that checks for write space
       [not found]     ` <20071129224012.14563.23130.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-11-30 20:46       ` Chuck Lever
  2007-11-30 21:39         ` Tom Tucker
  0 siblings, 1 reply; 62+ messages in thread
From: Chuck Lever @ 2007-11-30 20:46 UTC (permalink / raw)
  To: Tom Tucker; +Cc: J. Bruce Fields, linux-nfs

On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> In order to avoid blocking a service thread, the receive side checks
> to see if there is sufficient write space to reply to the request.
> Each transport has a different mechanism for determining if there is
> enough write space to reply.
>
> The code that checked for white space was coupled with code that

s/white space/write space/

> checked for CLOSE and CONN. These checks have been broken out into
> separate statements to make the code easier to read.
>
> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
>
>  include/linux/sunrpc/svc_xprt.h |    1 +
>  net/sunrpc/svcsock.c            |   60 ++++++++++++++++++++++++++++ 
> +++++------
>  2 files changed, 51 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> svc_xprt.h
> index 8501115..3adc8f3 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -10,6 +10,7 @@
>  #include <linux/sunrpc/svc.h>
>
>  struct svc_xprt_ops {
> +	int		(*xpo_has_wspace)(struct svc_xprt *);
>  	int		(*xpo_recvfrom)(struct svc_rqst *);
>  	void		(*xpo_prep_reply_hdr)(struct svc_rqst *);
>  	int		(*xpo_sendto)(struct svc_rqst *);
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 510ad45..b796244 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -269,22 +269,24 @@ svc_sock_enqueue(struct svc_sock *svsk)
>  	BUG_ON(svsk->sk_pool != NULL);
>  	svsk->sk_pool = pool;
>
> -	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> -	if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
> -	     > svc_sock_wspace(svsk))
> -	    && !test_bit(SK_CLOSE, &svsk->sk_flags)
> -	    && !test_bit(SK_CONN, &svsk->sk_flags)) {
> +	/* Handle pending connection */
> +	if (test_bit(SK_CONN, &svsk->sk_flags))
> +		goto process;
> +
> +	/* Handle close in-progress */
> +	if (test_bit(SK_CLOSE, &svsk->sk_flags))
> +		goto process;
> +
> +	/* Check if we have space to reply to a request */
> +	if (!svsk->sk_xprt.xpt_ops->xpo_has_wspace(&svsk->sk_xprt)) {
>  		/* Don't enqueue while not enough space for reply */
> -		dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
> -			svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
> -			svc_sock_wspace(svsk));
> +		dprintk("svc: no write space, socket %p  not enqueued\n", svsk);

Since you remove the only callers of svc_sock_wspace here, you can  
probably safely delete that function in this patch as well.

>  		svsk->sk_pool = NULL;
>  		clear_bit(SK_BUSY, &svsk->sk_flags);
>  		goto out_unlock;
>  	}
> -	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> -
>
> + process:
>  	if (!list_empty(&pool->sp_threads)) {
>  		rqstp = list_entry(pool->sp_threads.next,
>  				   struct svc_rqst,
> @@ -897,6 +899,24 @@ static void svc_udp_prep_reply_hdr(struct  
> svc_rqst *rqstp)
>  {
>  }
>
> +static int svc_udp_has_wspace(struct svc_xprt *xprt)
> +{
> +	struct svc_sock *svsk = container_of(xprt, struct svc_sock,  
> sk_xprt);
> +	struct svc_serv	*serv = svsk->sk_server;
> +	int required;
> +
> +	/*
> +	 * Set the SOCK_NOSPACE flag before checking the available
> +	 * sock space.
> +	 */
> +	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;

The result of the sum is unsigned, but then we stuff it into a signed  
integer...

> +	if (required*2 > sock_wspace(svsk->sk_sk))
> +		return 0;

...and then this introduces a mixed sign comparison (harmless  
AFAICT).  Perhaps "required" should be an unsigned long.

Also, some may prefer "<< 1" to "* 2".  I'm not sure it makes much  
difference here.  Arguably, it might be slightly better documentation  
to double "required" before the if statement.

> +	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +	return 1;
> +}
> +
>  static struct svc_xprt_ops svc_udp_ops = {
>  	.xpo_recvfrom = svc_udp_recvfrom,
>  	.xpo_sendto = svc_udp_sendto,
> @@ -904,6 +924,7 @@ static struct svc_xprt_ops svc_udp_ops = {
>  	.xpo_detach = svc_sock_detach,
>  	.xpo_free = svc_sock_free,
>  	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
> +	.xpo_has_wspace = svc_udp_has_wspace,
>  };
>
>  static struct svc_xprt_class svc_udp_class = {
> @@ -1366,6 +1387,24 @@ static void svc_tcp_prep_reply_hdr(struct  
> svc_rqst *rqstp)
>  	svc_putnl(resv, 0);
>  }
>
> +static int svc_tcp_has_wspace(struct svc_xprt *xprt)
> +{
> +	struct svc_sock *svsk = container_of(xprt, struct svc_sock,  
> sk_xprt);
> +	struct svc_serv	*serv = svsk->sk_server;
> +	int required;
> +
> +	/*
> +	 * Set the SOCK_NOSPACE flag before checking the available
> +	 * sock space.
> +	 */
> +	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;

Ibid.

> +	if (required*2 > sk_stream_wspace(svsk->sk_sk))
> +		return 0;

Oddly sk_stream_wspace() returns an int, but sock_space() returns an  
unsigned long.  Sigh...

> +	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +	return 1;
> +}
> +
>  static struct svc_xprt_ops svc_tcp_ops = {
>  	.xpo_recvfrom = svc_tcp_recvfrom,
>  	.xpo_sendto = svc_tcp_sendto,
> @@ -1373,6 +1412,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
>  	.xpo_detach = svc_sock_detach,
>  	.xpo_free = svc_sock_free,
>  	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> +	.xpo_has_wspace = svc_tcp_has_wspace,
>  };
>
>  static struct svc_xprt_class svc_tcp_class = {

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 04/38] svc: Add a max payload value to the transport
  2007-11-30 20:22       ` Chuck Lever
@ 2007-11-30 20:51         ` Tom Tucker
  0 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-30 20:51 UTC (permalink / raw)
  To: Chuck Lever; +Cc: bfields-ag9A2Eb6PFsgsBAKwltoeQ, linux-nfs

Thanks Chuck, I'll fix this.

On Fri, 2007-11-30 at 15:22 -0500, Chuck Lever wrote:
> On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> > The svc_max_payload function currently looks at the socket type
> > to determine the max payload. Add a max payload value to  
> > svc_xprt_class
> > so it can be returned directly.
> >
> > Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> > ---
> >
> >  include/linux/sunrpc/svc_xprt.h |    1 +
> >  net/sunrpc/svc.c                |    4 +---
> >  net/sunrpc/svcsock.c            |    2 ++
> >  3 files changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> > svc_xprt.h
> > index a8b1da8..b4ce054 100644
> > --- a/include/linux/sunrpc/svc_xprt.h
> > +++ b/include/linux/sunrpc/svc_xprt.h
> > @@ -17,6 +17,7 @@ struct svc_xprt_class {
> >  	struct module		*xcl_owner;
> >  	struct svc_xprt_ops	*xcl_ops;
> >  	struct list_head	xcl_list;
> > +	u32			xcl_max_payload;
> >  };
> >
> >  struct svc_xprt {
> > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > index a4a6bf7..ce59044 100644
> > --- a/net/sunrpc/svc.c
> > +++ b/net/sunrpc/svc.c
> > @@ -1054,10 +1054,8 @@ err_bad:
> >   */
> >  u32 svc_max_payload(const struct svc_rqst *rqstp)
> >  {
> > -	int max = RPCSVC_MAXPAYLOAD_TCP;
> > +	int max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
> 
> Nit: xcl_max_payload is unsigned, as is sv_max_payload, and so is the  
> return type of the svc_max_payload() function, so the automatic  
> variable "max" should also be an unsigned integral type.  (Implicit  
> type conversion in the comparison below prevents this from being an  
> actual bug).
> 
> > -	if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM)
> > -		max = RPCSVC_MAXPAYLOAD_UDP;
> >  	if (rqstp->rq_server->sv_max_payload < max)
> >  		max = rqstp->rq_server->sv_max_payload;
> >  	return max;
> > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> > index 4755467..ca9b8d8 100644
> > --- a/net/sunrpc/svcsock.c
> > +++ b/net/sunrpc/svcsock.c
> > @@ -906,6 +906,7 @@ static struct svc_xprt_ops svc_udp_ops = {
> >  static struct svc_xprt_class svc_udp_class = {
> >  	.xcl_name = "udp",
> >  	.xcl_ops = &svc_udp_ops,
> > +	.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
> >  };
> >
> >  static void
> > @@ -1359,6 +1360,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
> >  static struct svc_xprt_class svc_tcp_class = {
> >  	.xcl_name = "tcp",
> >  	.xcl_ops = &svc_tcp_ops,
> > +	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
> >  };
> >
> >  void svc_init_xprt_sock(void)
> 
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com


^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 11/38] svc: Add xpo_accept transport function
       [not found]     ` <20071129224016.14563.67547.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-11-30 21:01       ` Chuck Lever
  2007-11-30 21:47         ` Tom Tucker
  0 siblings, 1 reply; 62+ messages in thread
From: Chuck Lever @ 2007-11-30 21:01 UTC (permalink / raw)
  To: Tom Tucker; +Cc: J. Bruce Fields, linux-nfs

The refactoring here helps clarity.  More below.

On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> Previously, the accept logic looked into the socket state to determine
> whether to call accept or recv when data-ready was indicated on an  
> endpoint.
> Since some transports don't use sockets, this logic was changed to  
> use a flag
> bit (SK_LISTENER) to identify listening endpoints. A transport  
> function
> (xpo_accept) was added to allow each transport to define its own  
> accept
> processing. A transport's initialization logic is reponsible for  
> setting the
> SK_LISTENER bit. I didn't see any way to do this in transport  
> independent
> logic since the passive side of a UDP connection doesn't listen and
> always recv's.
>
> In the svc_recv function, if the SK_LISTENER bit is set, the transport
> xpo_accept function is called to handle accept processing.
>
> Note that all functions are defined even if they don't make sense
> for a given transport. For example, accept doesn't mean anything for
> UDP. The fuction is defined anyway and bug checks if called. The

s/fuction/function/

:-O

> UDP transport should never set the SK_LISTENER bit.
>
> The code that poaches connections when the connection
> limit is hit was moved to a subroutine to make the accept logic path
> easier to follow. Since this is in the new connection path, it should
> not be a performance issue.
>
> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
>
>  include/linux/sunrpc/svc_xprt.h |    1
>  include/linux/sunrpc/svcsock.h  |    1
>  net/sunrpc/svcsock.c            |  127 ++++++++++++++++++++ 
> +------------------
>  3 files changed, 72 insertions(+), 57 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> svc_xprt.h
> index 3adc8f3..1527ff1 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -10,6 +10,7 @@
>  #include <linux/sunrpc/svc.h>
>
>  struct svc_xprt_ops {
> +	struct svc_xprt	*(*xpo_accept)(struct svc_xprt *);
>  	int		(*xpo_has_wspace)(struct svc_xprt *);
>  	int		(*xpo_recvfrom)(struct svc_rqst *);
>  	void		(*xpo_prep_reply_hdr)(struct svc_rqst *);
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/ 
> svcsock.h
> index 08e78d0..9882ce0 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -36,6 +36,7 @@ struct svc_sock {
>  #define	SK_DEFERRED	8			/* request on sk_deferred */
>  #define	SK_OLD		9			/* used for temp socket aging mark+sweep */
>  #define	SK_DETACHED	10			/* detached from tempsocks list */
> +#define SK_LISTENER	11			/* listening endpoint */
>
>  	atomic_t    	    	sk_reserved;	/* space on outq that is reserved */
>
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 38ecdd1..661162b 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -912,6 +912,12 @@ static int svc_udp_has_wspace(struct svc_xprt  
> *xprt)
>  	return 1;
>  }
>
> +static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
> +{
> +	BUG();
> +	return NULL;
> +}
> +
>  static struct svc_xprt_ops svc_udp_ops = {
>  	.xpo_recvfrom = svc_udp_recvfrom,
>  	.xpo_sendto = svc_udp_sendto,
> @@ -920,6 +926,7 @@ static struct svc_xprt_ops svc_udp_ops = {
>  	.xpo_free = svc_sock_free,
>  	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
>  	.xpo_has_wspace = svc_udp_has_wspace,
> +	.xpo_accept = svc_udp_accept,
>  };
>
>  static struct svc_xprt_class svc_udp_class = {
> @@ -1044,9 +1051,9 @@ static inline int svc_port_is_privileged 
> (struct sockaddr *sin)
>  /*
>   * Accept a TCP connection
>   */
> -static void
> -svc_tcp_accept(struct svc_sock *svsk)
> +static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
>  {
> +	struct svc_sock *svsk = container_of(xprt, struct svc_sock,  
> sk_xprt);
>  	struct sockaddr_storage addr;
>  	struct sockaddr	*sin = (struct sockaddr *) &addr;
>  	struct svc_serv	*serv = svsk->sk_server;
> @@ -1058,7 +1065,7 @@ svc_tcp_accept(struct svc_sock *svsk)
>
>  	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
>  	if (!sock)
> -		return;
> +		return NULL;
>
>  	clear_bit(SK_CONN, &svsk->sk_flags);
>  	err = kernel_accept(sock, &newsock, O_NONBLOCK);
> @@ -1069,7 +1076,7 @@ svc_tcp_accept(struct svc_sock *svsk)
>  		else if (err != -EAGAIN && net_ratelimit())
>  			printk(KERN_WARNING "%s: accept failed (err %d)!\n",
>  				   serv->sv_name, -err);
> -		return;
> +		return NULL;
>  	}
>
>  	set_bit(SK_CONN, &svsk->sk_flags);
> @@ -1115,59 +1122,14 @@ svc_tcp_accept(struct svc_sock *svsk)
>
>  	svc_sock_received(newsvsk);
>
> -	/* make sure that we don't have too many active connections.
> -	 * If we have, something must be dropped.
> -	 *
> -	 * There's no point in trying to do random drop here for
> -	 * DoS prevention. The NFS clients does 1 reconnect in 15
> -	 * seconds. An attacker can easily beat that.
> -	 *
> -	 * The only somewhat efficient mechanism would be if drop
> -	 * old connections from the same IP first. But right now
> -	 * we don't even record the client IP in svc_sock.
> -	 */

> -	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
> -		struct svc_sock *svsk = NULL;
> -		spin_lock_bh(&serv->sv_lock);
> -		if (!list_empty(&serv->sv_tempsocks)) {
> -			if (net_ratelimit()) {
> -				/* Try to help the admin */
> -				printk(KERN_NOTICE "%s: too many open TCP "
> -					"sockets, consider increasing the "
> -					"number of nfsd threads\n",
> -						   serv->sv_name);
> -				printk(KERN_NOTICE
> -				       "%s: last TCP connect from %s\n",
> -				       serv->sv_name, __svc_print_addr(sin,
> -							buf, sizeof(buf)));
> -			}
> -			/*
> -			 * Always select the oldest socket. It's not fair,
> -			 * but so is life
> -			 */
> -			svsk = list_entry(serv->sv_tempsocks.prev,
> -					  struct svc_sock,
> -					  sk_list);
> -			set_bit(SK_CLOSE, &svsk->sk_flags);
> -			atomic_inc(&svsk->sk_inuse);
> -		}
> -		spin_unlock_bh(&serv->sv_lock);
> -
> -		if (svsk) {
> -			svc_sock_enqueue(svsk);
> -			svc_sock_put(svsk);
> -		}
> -
> -	}
> -
>  	if (serv->sv_stats)
>  		serv->sv_stats->nettcpconn++;
>
> -	return;
> +	return &newsvsk->sk_xprt;
>
>  failed:
>  	sock_release(newsock);
> -	return;
> +	return NULL;
>  }
>
>  /*
> @@ -1192,12 +1154,6 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  		return svc_deferred_recv(rqstp);
>  	}
>
> -	if (svsk->sk_sk->sk_state == TCP_LISTEN) {
> -		svc_tcp_accept(svsk);
> -		svc_sock_received(svsk);
> -		return 0;
> -	}
> -
>  	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
>  		/* sndbuf needs to have room for one request
>  		 * per thread, otherwise we can stall even when the
> @@ -1403,6 +1359,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
>  	.xpo_free = svc_sock_free,
>  	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
>  	.xpo_has_wspace = svc_tcp_has_wspace,
> +	.xpo_accept = svc_tcp_accept,
>  };
>
>  static struct svc_xprt_class svc_tcp_class = {
> @@ -1433,6 +1390,7 @@ svc_tcp_init(struct svc_sock *svsk)
>
>  	if (sk->sk_state == TCP_LISTEN) {
>  		dprintk("setting up TCP socket for listening\n");
> +		set_bit(SK_LISTENER, &svsk->sk_flags);
>  		sk->sk_data_ready = svc_tcp_listen_data_ready;
>  		set_bit(SK_CONN, &svsk->sk_flags);
>  	} else {
> @@ -1484,6 +1442,55 @@ svc_sock_update_bufs(struct svc_serv *serv)
>  	spin_unlock_bh(&serv->sv_lock);
>  }
>
> +static void
> +svc_check_conn_limits(struct svc_serv *serv)

Style police want the return value type and function declaration on  
the same line.

> +{
> +	char	buf[RPC_MAX_ADDRBUFLEN];
> +
> +	/* make sure that we don't have too many active connections.
> +	 * If we have, something must be dropped.
> +	 *
> +	 * There's no point in trying to do random drop here for
> +	 * DoS prevention. The NFS clients does 1 reconnect in 15
> +	 * seconds. An attacker can easily beat that.
> +	 *
> +	 * The only somewhat efficient mechanism would be if drop
> +	 * old connections from the same IP first. But right now
> +	 * we don't even record the client IP in svc_sock.
> +	 */

Just a personal preference: I think this would better serve as a  
block comment placed just before the function declaration above.

> +	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {

Would be nice if the naked constants were defined as macros  
somewhere.  Some rationale for these values would be nice (does  
anyone (ie, Greg!) remember why these values were chosen?).

> +		struct svc_sock *svsk = NULL;
> +		spin_lock_bh(&serv->sv_lock);
> +		if (!list_empty(&serv->sv_tempsocks)) {
> +			if (net_ratelimit()) {
> +				/* Try to help the admin */
> +				printk(KERN_NOTICE "%s: too many open TCP "
> +					"sockets, consider increasing the "
> +					"number of nfsd threads\n",
> +						   serv->sv_name);
> +				printk(KERN_NOTICE
> +				       "%s: last TCP connect from %s\n",
> +				       serv->sv_name, buf);
> +			}
> +			/*
> +			 * Always select the oldest socket. It's not fair,
> +			 * but so is life
> +			 */
> +			svsk = list_entry(serv->sv_tempsocks.prev,
> +					  struct svc_sock,
> +					  sk_list);
> +			set_bit(SK_CLOSE, &svsk->sk_flags);
> +			atomic_inc(&svsk->sk_inuse);
> +		}
> +		spin_unlock_bh(&serv->sv_lock);
> +
> +		if (svsk) {
> +			svc_sock_enqueue(svsk);
> +			svc_sock_put(svsk);
> +		}
> +	}
> +}
> +
>  /*
>   * Receive the next request on any socket.  This code is carefully
>   * organised not to touch any cachelines in the shared svc_serv
> @@ -1579,6 +1586,12 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
>  	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
>  		dprintk("svc_recv: found SK_CLOSE\n");
>  		svc_delete_socket(svsk);
> +	} else if (test_bit(SK_LISTENER, &svsk->sk_flags)) {
> +		struct svc_xprt *newxpt;
> +		newxpt = svsk->sk_xprt.xpt_ops->xpo_accept(&svsk->sk_xprt);
> +		if (newxpt)
> +			svc_check_conn_limits(svsk->sk_server);
> +		svc_sock_received(svsk);
>  	} else {
>  		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
>  			rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com




^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 21/38] svc: Change svc_sock_received to svc_xprt_received and export it
       [not found]     ` <20071129224037.14563.69171.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-11-30 21:33       ` Chuck Lever
  2007-11-30 23:17         ` Tom Tucker
  0 siblings, 1 reply; 62+ messages in thread
From: Chuck Lever @ 2007-11-30 21:33 UTC (permalink / raw)
  To: Tom Tucker; +Cc: J. Bruce Fields, linux-nfs

On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> All fields touched by svc_sock_received are now transport independent.
> Change it to use svc_xprt directly. This function is called from
> transport dependent code, so export it.
>
> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
>
>  include/linux/sunrpc/svc_xprt.h |    2 +-
>  net/sunrpc/svcsock.c            |   37 +++++++++++++++++ 
> +-------------------
>  2 files changed, 19 insertions(+), 20 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> svc_xprt.h
> index d5ef902..c416d05 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -62,8 +62,8 @@ int	svc_unreg_xprt_class(struct svc_xprt_class *);
>  void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
>  		      struct svc_serv *);
>  int	svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
> +void	svc_xprt_received(struct svc_xprt *);
>  void	svc_xprt_put(struct svc_xprt *xprt);
> -
>  static inline void svc_xprt_get(struct svc_xprt *xprt)
>  {
>  	kref_get(&xprt->xpt_ref);
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 5666541..0015839 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -347,14 +347,14 @@ svc_sock_dequeue(struct svc_pool *pool)
>   * Note: XPT_DATA only gets cleared when a read-attempt finds
>   * no (or insufficient) data.
>   */
> -static inline void
> -svc_sock_received(struct svc_sock *svsk)
> +void
> +svc_xprt_received(struct svc_xprt *xprt)

Style police again.  I notice several of these patches add new  
functions with the return value split onto a separate line.

>  {
> -	svsk->sk_xprt.xpt_pool = NULL;
> -	clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
> -	svc_xprt_enqueue(&svsk->sk_xprt);
> +	xprt->xpt_pool = NULL;
> +	clear_bit(XPT_BUSY, &xprt->xpt_flags);
> +	svc_xprt_enqueue(xprt);
>  }
> -
> +EXPORT_SYMBOL_GPL(svc_xprt_received);

When I submitted the RPC client-side transport switch, Trond  
suggested we add the EXPORTs later when it was clear why they are  
needed.  This may be a personal preference of the server maintainer,  
but I just thought I'd mention the possibility; it seems to make  
sense here too.

>  /**
>   * svc_reserve - change the space reserved for the reply to a  
> request.
> @@ -783,7 +783,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
>  				(serv->sv_nrthreads+3) * serv->sv_max_mesg);
>
>  	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
> -		svc_sock_received(svsk);
> +		svc_xprt_received(&svsk->sk_xprt);
>  		return svc_deferred_recv(rqstp);
>  	}
>
> @@ -800,7 +800,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
>  			dprintk("svc: recvfrom returned error %d\n", -err);
>  			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
>  		}
> -		svc_sock_received(svsk);
> +		svc_xprt_received(&svsk->sk_xprt);
>  		return -EAGAIN;
>  	}
>  	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
> @@ -815,7 +815,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
>  	/*
>  	 * Maybe more packets - kick another thread ASAP.
>  	 */
> -	svc_sock_received(svsk);
> +	svc_xprt_received(&svsk->sk_xprt);
>
>  	len  = skb->len - sizeof(struct udphdr);
>  	rqstp->rq_arg.len = len;
> @@ -1123,8 +1123,6 @@ static struct svc_xprt *svc_tcp_accept(struct  
> svc_xprt *xprt)
>  	}
>  	memcpy(&newsvsk->sk_local, sin, slen);
>
> -	svc_sock_received(newsvsk);
> -

I assume it's OK to remove svc_sock_received() here (rather than  
replacing it with svc_xprt_received()) because you are adding a call  
to xvs_xprt_received() below in svc_recv().

I think this is a non-trivial change amongst a whole bunch of trivial  
ones in this patch.  Thus it would be nicer if we did this in a  
separate patch so you can document your rationale for this change.   
(Yeah, I think we went over this in e-mail some time ago, but still...)

>  	if (serv->sv_stats)
>  		serv->sv_stats->nettcpconn++;
>
> @@ -1153,7 +1151,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
>
>  	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
> -		svc_sock_received(svsk);
> +		svc_xprt_received(&svsk->sk_xprt);
>  		return svc_deferred_recv(rqstp);
>  	}
>
> @@ -1193,7 +1191,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  		if (len < want) {
>  			dprintk("svc: short recvfrom while reading record length (%d of  
> %lu)\n",
>  				len, want);
> -			svc_sock_received(svsk);
> +			svc_xprt_received(&svsk->sk_xprt);
>  			return -EAGAIN; /* record header not complete */
>  		}
>
> @@ -1229,7 +1227,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  	if (len < svsk->sk_reclen) {
>  		dprintk("svc: incomplete TCP record (%d of %d)\n",
>  			len, svsk->sk_reclen);
> -		svc_sock_received(svsk);
> +		svc_xprt_received(&svsk->sk_xprt);
>  		return -EAGAIN;	/* record not complete */
>  	}
>  	len = svsk->sk_reclen;
> @@ -1269,7 +1267,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  	svsk->sk_reclen = 0;
>  	svsk->sk_tcplen = 0;
>
> -	svc_sock_received(svsk);
> +	svc_xprt_received(&svsk->sk_xprt);
>  	if (serv->sv_stats)
>  		serv->sv_stats->nettcpcnt++;
>
> @@ -1282,7 +1280,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
>   error:
>  	if (len == -EAGAIN) {
>  		dprintk("RPC: TCP recvfrom got EAGAIN\n");
> -		svc_sock_received(svsk);
> +		svc_xprt_received(&svsk->sk_xprt);
>  	} else {
>  		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
>  		       svsk->sk_xprt.xpt_server->sv_name, -len);
> @@ -1607,8 +1605,9 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
>  			 */
>  			__module_get(newxpt->xpt_class->xcl_owner);
>  			svc_check_conn_limits(svsk->sk_xprt.xpt_server);
> +			svc_xprt_received(newxpt);
>  		}
> -		svc_sock_received(svsk);
> +		svc_xprt_received(&svsk->sk_xprt);
>  	} else {
>  		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
>  			rqstp, pool->sp_id, svsk,
> @@ -1827,7 +1826,7 @@ int svc_addsock(struct svc_serv *serv,
>  	else {
>  		svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS);
>  		if (svsk) {
> -			svc_sock_received(svsk);
> +			svc_xprt_received(&svsk->sk_xprt);
>  			err = 0;
>  		}
>  	}
> @@ -1882,7 +1881,7 @@ svc_create_socket(struct svc_serv *serv, int  
> protocol,
>  	}
>
>  	if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
> -		svc_sock_received(svsk);
> +		svc_xprt_received(&svsk->sk_xprt);
>  		return (struct svc_xprt *)svsk;
>  	}
>

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 09/38] svc: Add a transport function that checks for write space
  2007-11-30 20:46       ` Chuck Lever
@ 2007-11-30 21:39         ` Tom Tucker
       [not found]           ` <1196458764.5432.52.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
  0 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-30 21:39 UTC (permalink / raw)
  To: Chuck Lever; +Cc: J. Bruce Fields, linux-nfs


On Fri, 2007-11-30 at 15:46 -0500, Chuck Lever wrote:
> On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> > In order to avoid blocking a service thread, the receive side checks
> > to see if there is sufficient write space to reply to the request.
> > Each transport has a different mechanism for determining if there is
> > enough write space to reply.
> >
> > The code that checked for white space was coupled with code that
> 
> s/white space/write space/

ok.

> 
> > checked for CLOSE and CONN. These checks have been broken out into
> > separate statements to make the code easier to read.
> >
> > Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> > ---
> >
> >  include/linux/sunrpc/svc_xprt.h |    1 +
> >  net/sunrpc/svcsock.c            |   60 ++++++++++++++++++++++++++++ 
> > +++++------
> >  2 files changed, 51 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> > svc_xprt.h
> > index 8501115..3adc8f3 100644
> > --- a/include/linux/sunrpc/svc_xprt.h
> > +++ b/include/linux/sunrpc/svc_xprt.h
> > @@ -10,6 +10,7 @@
> >  #include <linux/sunrpc/svc.h>
> >
> >  struct svc_xprt_ops {
> > +	int		(*xpo_has_wspace)(struct svc_xprt *);
> >  	int		(*xpo_recvfrom)(struct svc_rqst *);
> >  	void		(*xpo_prep_reply_hdr)(struct svc_rqst *);
> >  	int		(*xpo_sendto)(struct svc_rqst *);
> > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> > index 510ad45..b796244 100644
> > --- a/net/sunrpc/svcsock.c
> > +++ b/net/sunrpc/svcsock.c
> > @@ -269,22 +269,24 @@ svc_sock_enqueue(struct svc_sock *svsk)
> >  	BUG_ON(svsk->sk_pool != NULL);
> >  	svsk->sk_pool = pool;
> >
> > -	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > -	if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
> > -	     > svc_sock_wspace(svsk))
> > -	    && !test_bit(SK_CLOSE, &svsk->sk_flags)
> > -	    && !test_bit(SK_CONN, &svsk->sk_flags)) {
> > +	/* Handle pending connection */
> > +	if (test_bit(SK_CONN, &svsk->sk_flags))
> > +		goto process;
> > +
> > +	/* Handle close in-progress */
> > +	if (test_bit(SK_CLOSE, &svsk->sk_flags))
> > +		goto process;
> > +
> > +	/* Check if we have space to reply to a request */
> > +	if (!svsk->sk_xprt.xpt_ops->xpo_has_wspace(&svsk->sk_xprt)) {
> >  		/* Don't enqueue while not enough space for reply */
> > -		dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
> > -			svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
> > -			svc_sock_wspace(svsk));
> > +		dprintk("svc: no write space, socket %p  not enqueued\n", svsk);
> 
> Since you remove the only callers of svc_sock_wspace here, you can  
> probably safely delete that function in this patch as well.
> 

ok.

> >  		svsk->sk_pool = NULL;
> >  		clear_bit(SK_BUSY, &svsk->sk_flags);
> >  		goto out_unlock;
> >  	}
> > -	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > -
> >
> > + process:
> >  	if (!list_empty(&pool->sp_threads)) {
> >  		rqstp = list_entry(pool->sp_threads.next,
> >  				   struct svc_rqst,
> > @@ -897,6 +899,24 @@ static void svc_udp_prep_reply_hdr(struct  
> > svc_rqst *rqstp)
> >  {
> >  }
> >
> > +static int svc_udp_has_wspace(struct svc_xprt *xprt)
> > +{
> > +	struct svc_sock *svsk = container_of(xprt, struct svc_sock,  
> > sk_xprt);
> > +	struct svc_serv	*serv = svsk->sk_server;
> > +	int required;
> > +
> > +	/*
> > +	 * Set the SOCK_NOSPACE flag before checking the available
> > +	 * sock space.
> > +	 */
> > +	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > +	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
> 
> The result of the sum is unsigned, but then we stuff it into a signed  
> integer...
> 
> > +	if (required*2 > sock_wspace(svsk->sk_sk))
> > +		return 0;
> 
> ...and then this introduces a mixed sign comparison (harmless  
> AFAICT).  Perhaps "required" should be an unsigned long.
> 

So for svc_udp_has_wspace, it makes sense for required to 
be unsigned, and then demote to signed on return. Yes?

> 
> Also, some may prefer "<< 1" to "* 2".  I'm not sure it makes much  
> difference here.  Arguably, it might be slightly better documentation  
> to double "required" before the if statement.
> 
> > +	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > +	return 1;
> > +}
> > +
> >  static struct svc_xprt_ops svc_udp_ops = {
> >  	.xpo_recvfrom = svc_udp_recvfrom,
> >  	.xpo_sendto = svc_udp_sendto,
> > @@ -904,6 +924,7 @@ static struct svc_xprt_ops svc_udp_ops = {
> >  	.xpo_detach = svc_sock_detach,
> >  	.xpo_free = svc_sock_free,
> >  	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
> > +	.xpo_has_wspace = svc_udp_has_wspace,
> >  };
> >
> >  static struct svc_xprt_class svc_udp_class = {
> > @@ -1366,6 +1387,24 @@ static void svc_tcp_prep_reply_hdr(struct  
> > svc_rqst *rqstp)
> >  	svc_putnl(resv, 0);
> >  }
> >
> > +static int svc_tcp_has_wspace(struct svc_xprt *xprt)
> > +{
> > +	struct svc_sock *svsk = container_of(xprt, struct svc_sock,  
> > sk_xprt);
> > +	struct svc_serv	*serv = svsk->sk_server;
> > +	int required;
> > +
> > +	/*
> > +	 * Set the SOCK_NOSPACE flag before checking the available
> > +	 * sock space.
> > +	 */
> > +	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > +	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
> 
> Ibid.
> 
> > +	if (required*2 > sk_stream_wspace(svsk->sk_sk))
> > +		return 0;
> 
> Oddly sk_stream_wspace() returns an int, but sock_space() returns an  
> unsigned long.  Sigh...

For this one, let's leave required signed and add an explicit cast to
serv->sv_max_mesg. Sound ok?

What a mess...

> 
> > +	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> > +	return 1;
> > +}
> > +
> >  static struct svc_xprt_ops svc_tcp_ops = {
> >  	.xpo_recvfrom = svc_tcp_recvfrom,
> >  	.xpo_sendto = svc_tcp_sendto,
> > @@ -1373,6 +1412,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
> >  	.xpo_detach = svc_sock_detach,
> >  	.xpo_free = svc_sock_free,
> >  	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> > +	.xpo_has_wspace = svc_tcp_has_wspace,
> >  };
> >
> >  static struct svc_xprt_class svc_tcp_class = {
> 
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com


^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 11/38] svc: Add xpo_accept transport function
  2007-11-30 21:01       ` Chuck Lever
@ 2007-11-30 21:47         ` Tom Tucker
  0 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-11-30 21:47 UTC (permalink / raw)
  To: Chuck Lever; +Cc: J. Bruce Fields, linux-nfs


On Fri, 2007-11-30 at 16:01 -0500, Chuck Lever wrote:
> The refactoring here helps clarity.  More below.
> 
> On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> > Previously, the accept logic looked into the socket state to determine
> > whether to call accept or recv when data-ready was indicated on an  
> > endpoint.
> > Since some transports don't use sockets, this logic was changed to  
> > use a flag
> > bit (SK_LISTENER) to identify listening endpoints. A transport  
> > function
> > (xpo_accept) was added to allow each transport to define its own  
> > accept
> > processing. A transport's initialization logic is reponsible for  
> > setting the
> > SK_LISTENER bit. I didn't see any way to do this in transport  
> > independent
> > logic since the passive side of a UDP connection doesn't listen and
> > always recv's.
> >
> > In the svc_recv function, if the SK_LISTENER bit is set, the transport
> > xpo_accept function is called to handle accept processing.
> >
> > Note that all functions are defined even if they don't make sense
> > for a given transport. For example, accept doesn't mean anything for
> > UDP. The fuction is defined anyway and bug checks if called. The
> 
> s/fuction/function/
> 
> :-O

ok.

> 
> > UDP transport should never set the SK_LISTENER bit.
> >
> > The code that poaches connections when the connection
> > limit is hit was moved to a subroutine to make the accept logic path
> > easier to follow. Since this is in the new connection path, it should
> > not be a performance issue.
> >
> > Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> > ---
> >
> >  include/linux/sunrpc/svc_xprt.h |    1
> >  include/linux/sunrpc/svcsock.h  |    1
> >  net/sunrpc/svcsock.c            |  127 ++++++++++++++++++++ 
> > +------------------
> >  3 files changed, 72 insertions(+), 57 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> > svc_xprt.h
> > index 3adc8f3..1527ff1 100644
> > --- a/include/linux/sunrpc/svc_xprt.h
> > +++ b/include/linux/sunrpc/svc_xprt.h
> > @@ -10,6 +10,7 @@
> >  #include <linux/sunrpc/svc.h>
> >
> >  struct svc_xprt_ops {
> > +	struct svc_xprt	*(*xpo_accept)(struct svc_xprt *);
> >  	int		(*xpo_has_wspace)(struct svc_xprt *);
> >  	int		(*xpo_recvfrom)(struct svc_rqst *);
> >  	void		(*xpo_prep_reply_hdr)(struct svc_rqst *);
> > diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/ 
> > svcsock.h
> > index 08e78d0..9882ce0 100644
> > --- a/include/linux/sunrpc/svcsock.h
> > +++ b/include/linux/sunrpc/svcsock.h
> > @@ -36,6 +36,7 @@ struct svc_sock {
> >  #define	SK_DEFERRED	8			/* request on sk_deferred */
> >  #define	SK_OLD		9			/* used for temp socket aging mark+sweep */
> >  #define	SK_DETACHED	10			/* detached from tempsocks list */
> > +#define SK_LISTENER	11			/* listening endpoint */
> >
> >  	atomic_t    	    	sk_reserved;	/* space on outq that is reserved */
> >
> > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> > index 38ecdd1..661162b 100644
> > --- a/net/sunrpc/svcsock.c
> > +++ b/net/sunrpc/svcsock.c
> > @@ -912,6 +912,12 @@ static int svc_udp_has_wspace(struct svc_xprt  
> > *xprt)
> >  	return 1;
> >  }
> >
> > +static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
> > +{
> > +	BUG();
> > +	return NULL;
> > +}
> > +
> >  static struct svc_xprt_ops svc_udp_ops = {
> >  	.xpo_recvfrom = svc_udp_recvfrom,
> >  	.xpo_sendto = svc_udp_sendto,
> > @@ -920,6 +926,7 @@ static struct svc_xprt_ops svc_udp_ops = {
> >  	.xpo_free = svc_sock_free,
> >  	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
> >  	.xpo_has_wspace = svc_udp_has_wspace,
> > +	.xpo_accept = svc_udp_accept,
> >  };
> >
> >  static struct svc_xprt_class svc_udp_class = {
> > @@ -1044,9 +1051,9 @@ static inline int svc_port_is_privileged 
> > (struct sockaddr *sin)
> >  /*
> >   * Accept a TCP connection
> >   */
> > -static void
> > -svc_tcp_accept(struct svc_sock *svsk)
> > +static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
> >  {
> > +	struct svc_sock *svsk = container_of(xprt, struct svc_sock,  
> > sk_xprt);
> >  	struct sockaddr_storage addr;
> >  	struct sockaddr	*sin = (struct sockaddr *) &addr;
> >  	struct svc_serv	*serv = svsk->sk_server;
> > @@ -1058,7 +1065,7 @@ svc_tcp_accept(struct svc_sock *svsk)
> >
> >  	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
> >  	if (!sock)
> > -		return;
> > +		return NULL;
> >
> >  	clear_bit(SK_CONN, &svsk->sk_flags);
> >  	err = kernel_accept(sock, &newsock, O_NONBLOCK);
> > @@ -1069,7 +1076,7 @@ svc_tcp_accept(struct svc_sock *svsk)
> >  		else if (err != -EAGAIN && net_ratelimit())
> >  			printk(KERN_WARNING "%s: accept failed (err %d)!\n",
> >  				   serv->sv_name, -err);
> > -		return;
> > +		return NULL;
> >  	}
> >
> >  	set_bit(SK_CONN, &svsk->sk_flags);
> > @@ -1115,59 +1122,14 @@ svc_tcp_accept(struct svc_sock *svsk)
> >
> >  	svc_sock_received(newsvsk);
> >
> > -	/* make sure that we don't have too many active connections.
> > -	 * If we have, something must be dropped.
> > -	 *
> > -	 * There's no point in trying to do random drop here for
> > -	 * DoS prevention. The NFS clients does 1 reconnect in 15
> > -	 * seconds. An attacker can easily beat that.
> > -	 *
> > -	 * The only somewhat efficient mechanism would be if drop
> > -	 * old connections from the same IP first. But right now
> > -	 * we don't even record the client IP in svc_sock.
> > -	 */
> 
> > -	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
> > -		struct svc_sock *svsk = NULL;
> > -		spin_lock_bh(&serv->sv_lock);
> > -		if (!list_empty(&serv->sv_tempsocks)) {
> > -			if (net_ratelimit()) {
> > -				/* Try to help the admin */
> > -				printk(KERN_NOTICE "%s: too many open TCP "
> > -					"sockets, consider increasing the "
> > -					"number of nfsd threads\n",
> > -						   serv->sv_name);
> > -				printk(KERN_NOTICE
> > -				       "%s: last TCP connect from %s\n",
> > -				       serv->sv_name, __svc_print_addr(sin,
> > -							buf, sizeof(buf)));
> > -			}
> > -			/*
> > -			 * Always select the oldest socket. It's not fair,
> > -			 * but so is life
> > -			 */
> > -			svsk = list_entry(serv->sv_tempsocks.prev,
> > -					  struct svc_sock,
> > -					  sk_list);
> > -			set_bit(SK_CLOSE, &svsk->sk_flags);
> > -			atomic_inc(&svsk->sk_inuse);
> > -		}
> > -		spin_unlock_bh(&serv->sv_lock);
> > -
> > -		if (svsk) {
> > -			svc_sock_enqueue(svsk);
> > -			svc_sock_put(svsk);
> > -		}
> > -
> > -	}
> > -
> >  	if (serv->sv_stats)
> >  		serv->sv_stats->nettcpconn++;
> >
> > -	return;
> > +	return &newsvsk->sk_xprt;
> >
> >  failed:
> >  	sock_release(newsock);
> > -	return;
> > +	return NULL;
> >  }
> >
> >  /*
> > @@ -1192,12 +1154,6 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
> >  		return svc_deferred_recv(rqstp);
> >  	}
> >
> > -	if (svsk->sk_sk->sk_state == TCP_LISTEN) {
> > -		svc_tcp_accept(svsk);
> > -		svc_sock_received(svsk);
> > -		return 0;
> > -	}
> > -
> >  	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
> >  		/* sndbuf needs to have room for one request
> >  		 * per thread, otherwise we can stall even when the
> > @@ -1403,6 +1359,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
> >  	.xpo_free = svc_sock_free,
> >  	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> >  	.xpo_has_wspace = svc_tcp_has_wspace,
> > +	.xpo_accept = svc_tcp_accept,
> >  };
> >
> >  static struct svc_xprt_class svc_tcp_class = {
> > @@ -1433,6 +1390,7 @@ svc_tcp_init(struct svc_sock *svsk)
> >
> >  	if (sk->sk_state == TCP_LISTEN) {
> >  		dprintk("setting up TCP socket for listening\n");
> > +		set_bit(SK_LISTENER, &svsk->sk_flags);
> >  		sk->sk_data_ready = svc_tcp_listen_data_ready;
> >  		set_bit(SK_CONN, &svsk->sk_flags);
> >  	} else {
> > @@ -1484,6 +1442,55 @@ svc_sock_update_bufs(struct svc_serv *serv)
> >  	spin_unlock_bh(&serv->sv_lock);
> >  }
> >
> > +static void
> > +svc_check_conn_limits(struct svc_serv *serv)
> 
> Style police want the return value type and function declaration on  
> the same line.
> 

yes.

> > +{
> > +	char	buf[RPC_MAX_ADDRBUFLEN];
> > +
> > +	/* make sure that we don't have too many active connections.
> > +	 * If we have, something must be dropped.
> > +	 *
> > +	 * There's no point in trying to do random drop here for
> > +	 * DoS prevention. The NFS clients does 1 reconnect in 15
> > +	 * seconds. An attacker can easily beat that.
> > +	 *
> > +	 * The only somewhat efficient mechanism would be if drop
> > +	 * old connections from the same IP first. But right now
> > +	 * we don't even record the client IP in svc_sock.
> > +	 */
> 
> Just a personal preference: I think this would better serve as a  
> block comment placed just before the function declaration above.
> 

agreed.


> > +	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
> 
> Would be nice if the naked constants were defined as macros  
> somewhere.  Some rationale for these values would be nice (does  
> anyone (ie, Greg!) remember why these values were chosen?).
> 

I've noodled on this before and produced nothing but heat. I'm afraid
the best I can come up with is #define THREE and #define TWENTY :-)

> > +		struct svc_sock *svsk = NULL;
> > +		spin_lock_bh(&serv->sv_lock);
> > +		if (!list_empty(&serv->sv_tempsocks)) {
> > +			if (net_ratelimit()) {
> > +				/* Try to help the admin */
> > +				printk(KERN_NOTICE "%s: too many open TCP "
> > +					"sockets, consider increasing the "
> > +					"number of nfsd threads\n",
> > +						   serv->sv_name);
> > +				printk(KERN_NOTICE
> > +				       "%s: last TCP connect from %s\n",
> > +				       serv->sv_name, buf);
> > +			}
> > +			/*
> > +			 * Always select the oldest socket. It's not fair,
> > +			 * but so is life
> > +			 */
> > +			svsk = list_entry(serv->sv_tempsocks.prev,
> > +					  struct svc_sock,
> > +					  sk_list);
> > +			set_bit(SK_CLOSE, &svsk->sk_flags);
> > +			atomic_inc(&svsk->sk_inuse);
> > +		}
> > +		spin_unlock_bh(&serv->sv_lock);
> > +
> > +		if (svsk) {
> > +			svc_sock_enqueue(svsk);
> > +			svc_sock_put(svsk);
> > +		}
> > +	}
> > +}
> > +
> >  /*
> >   * Receive the next request on any socket.  This code is carefully
> >   * organised not to touch any cachelines in the shared svc_serv
> > @@ -1579,6 +1586,12 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
> >  	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
> >  		dprintk("svc_recv: found SK_CLOSE\n");
> >  		svc_delete_socket(svsk);
> > +	} else if (test_bit(SK_LISTENER, &svsk->sk_flags)) {
> > +		struct svc_xprt *newxpt;
> > +		newxpt = svsk->sk_xprt.xpt_ops->xpo_accept(&svsk->sk_xprt);
> > +		if (newxpt)
> > +			svc_check_conn_limits(svsk->sk_server);
> > +		svc_sock_received(svsk);
> >  	} else {
> >  		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
> >  			rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
> 
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com
> 
> 
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 09/38] svc: Add a transport function that checks for write space
       [not found]           ` <1196458764.5432.52.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
@ 2007-11-30 22:43             ` Chuck Lever
  2007-12-10 20:43               ` Tom Tucker
  0 siblings, 1 reply; 62+ messages in thread
From: Chuck Lever @ 2007-11-30 22:43 UTC (permalink / raw)
  To: Tom Tucker; +Cc: J. Bruce Fields, Neil Brown, linux-nfs

On Nov 30, 2007, at 4:39 PM, Tom Tucker wrote:
> On Fri, 2007-11-30 at 15:46 -0500, Chuck Lever wrote:
>> On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
>>> +static int svc_udp_has_wspace(struct svc_xprt *xprt)
>>> +{
>>> +	struct svc_sock *svsk = container_of(xprt, struct svc_sock,
>>> sk_xprt);
>>> +	struct svc_serv	*serv = svsk->sk_server;
>>> +	int required;
>>> +
>>> +	/*
>>> +	 * Set the SOCK_NOSPACE flag before checking the available
>>> +	 * sock space.
>>> +	 */
>>> +	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>>> +	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
>>
>> The result of the sum is unsigned, but then we stuff it into a signed
>> integer...
>>
>>> +	if (required*2 > sock_wspace(svsk->sk_sk))
>>> +		return 0;
>>
>> ...and then this introduces a mixed sign comparison (harmless
>> AFAICT).  Perhaps "required" should be an unsigned long.
>>
>
> So for svc_udp_has_wspace, it makes sense for required to
> be unsigned, and then demote to signed on return. Yes?
>
>>
>> Also, some may prefer "<< 1" to "* 2".  I'm not sure it makes much
>> difference here.  Arguably, it might be slightly better documentation
>> to double "required" before the if statement.
>>
>>> +	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>>> +	return 1;
>>> +}

As far as I can tell you never return "required."  The result of  
svc_udp_has_wspace() is really a boolean, right?  Or did I miss your  
point?

>>> +
>>>  static struct svc_xprt_ops svc_udp_ops = {
>>>  	.xpo_recvfrom = svc_udp_recvfrom,
>>>  	.xpo_sendto = svc_udp_sendto,
>>> @@ -904,6 +924,7 @@ static struct svc_xprt_ops svc_udp_ops = {
>>>  	.xpo_detach = svc_sock_detach,
>>>  	.xpo_free = svc_sock_free,
>>>  	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
>>> +	.xpo_has_wspace = svc_udp_has_wspace,
>>>  };
>>>
>>>  static struct svc_xprt_class svc_udp_class = {
>>> @@ -1366,6 +1387,24 @@ static void svc_tcp_prep_reply_hdr(struct
>>> svc_rqst *rqstp)
>>>  	svc_putnl(resv, 0);
>>>  }
>>>
>>> +static int svc_tcp_has_wspace(struct svc_xprt *xprt)
>>> +{
>>> +	struct svc_sock *svsk = container_of(xprt, struct svc_sock,
>>> sk_xprt);
>>> +	struct svc_serv	*serv = svsk->sk_server;
>>> +	int required;
>>> +
>>> +	/*
>>> +	 * Set the SOCK_NOSPACE flag before checking the available
>>> +	 * sock space.
>>> +	 */
>>> +	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>>> +	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
>>
>> Ibid.
>>
>>> +	if (required*2 > sk_stream_wspace(svsk->sk_sk))
>>> +		return 0;
>>
>> Oddly sk_stream_wspace() returns an int, but sock_space() returns an
>> unsigned long.  Sigh...
>
> For this one, let's leave required signed and add an explicit cast to
> serv->sv_max_mesg. Sound ok?

If sk_reserved goes negative, it will be converted to unsigned, and  
become a very large positive number.  The result of the sum will be  
recast back to an int when it's assigned to "required," and we  
probably get a reasonable result.  I doubt an explicit cast will  
change things at all.

Instead, perhaps we should add an explicit check to ensure  
sk_reserved is a reasonable positive value before doing any other  
checks.  (Likewise in the UDP case as well).

I wonder if this is really the correct write space check to use for  
TCP, though.  I remember fixing a similar issue in the RPC client a  
long time ago -- both UDP and TCP used the same wspace check.  It  
resulted in the sk_write_space callback hammering on the RPC client,  
and forward progress on TCP socket writes would slow to a crawl.

You probably want something like this instead:

	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);

	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
	wspace = sk_stream_wspace(svsk->sk_sk);

	if (wspace < sk_stream_min_wspace(svsk->sk_sk))
		return 0;
	if (required * 2 > wspace)
		return 0;

	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
	return 1;

The first test mimics sk_stream_write_space() and xs_tcp_write_space 
().  I'm still unsure what to do about the possibility of one of  
these signed integers going negative on us.

Bruce?  Neil?  What sayest thou?

>>> +	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>>> +	return 1;
>>> +}
>>> +
>>>  static struct svc_xprt_ops svc_tcp_ops = {
>>>  	.xpo_recvfrom = svc_tcp_recvfrom,
>>>  	.xpo_sendto = svc_tcp_sendto,
>>> @@ -1373,6 +1412,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
>>>  	.xpo_detach = svc_sock_detach,
>>>  	.xpo_free = svc_sock_free,
>>>  	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
>>> +	.xpo_has_wspace = svc_tcp_has_wspace,
>>>  };
>>>
>>>  static struct svc_xprt_class svc_tcp_class = {

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 21/38] svc: Change svc_sock_received to svc_xprt_received and export it
  2007-11-30 21:33       ` Chuck Lever
@ 2007-11-30 23:17         ` Tom Tucker
       [not found]           ` <1196464634.5432.68.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
  0 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-11-30 23:17 UTC (permalink / raw)
  To: Chuck Lever; +Cc: J. Bruce Fields, linux-nfs


On Fri, 2007-11-30 at 16:33 -0500, Chuck Lever wrote:
> On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> > All fields touched by svc_sock_received are now transport independent.
> > Change it to use svc_xprt directly. This function is called from
> > transport dependent code, so export it.
> >
> > Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> > ---
> >
> >  include/linux/sunrpc/svc_xprt.h |    2 +-
> >  net/sunrpc/svcsock.c            |   37 +++++++++++++++++ 
> > +-------------------
> >  2 files changed, 19 insertions(+), 20 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> > svc_xprt.h
> > index d5ef902..c416d05 100644
> > --- a/include/linux/sunrpc/svc_xprt.h
> > +++ b/include/linux/sunrpc/svc_xprt.h
> > @@ -62,8 +62,8 @@ int	svc_unreg_xprt_class(struct svc_xprt_class *);
> >  void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
> >  		      struct svc_serv *);
> >  int	svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
> > +void	svc_xprt_received(struct svc_xprt *);
> >  void	svc_xprt_put(struct svc_xprt *xprt);
> > -
> >  static inline void svc_xprt_get(struct svc_xprt *xprt)
> >  {
> >  	kref_get(&xprt->xpt_ref);
> > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> > index 5666541..0015839 100644
> > --- a/net/sunrpc/svcsock.c
> > +++ b/net/sunrpc/svcsock.c
> > @@ -347,14 +347,14 @@ svc_sock_dequeue(struct svc_pool *pool)
> >   * Note: XPT_DATA only gets cleared when a read-attempt finds
> >   * no (or insufficient) data.
> >   */
> > -static inline void
> > -svc_sock_received(struct svc_sock *svsk)
> > +void
> > +svc_xprt_received(struct svc_xprt *xprt)
> 
> Style police again.  I notice several of these patches add new  
> functions with the return value split onto a separate line.

The policy I used was if I didn't change the function signature, I left
it like it was. If I copied it to svc_xprt, I fixed the formatting of
the signature to conform. I didn't do that for this one. I'll check for
others.

> 
> >  {
> > -	svsk->sk_xprt.xpt_pool = NULL;
> > -	clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
> > -	svc_xprt_enqueue(&svsk->sk_xprt);
> > +	xprt->xpt_pool = NULL;
> > +	clear_bit(XPT_BUSY, &xprt->xpt_flags);
> > +	svc_xprt_enqueue(xprt);
> >  }
> > -
> > +EXPORT_SYMBOL_GPL(svc_xprt_received);
> 
> When I submitted the RPC client-side transport switch, Trond  
> suggested we add the EXPORTs later when it was clear why they are  
> needed.  This may be a personal preference of the server maintainer,  
> but I just thought I'd mention the possibility; it seems to make  
> sense here too.

Sure, but we already have a server side provider that helps accelerate
the proving process. The svcrdma module won't build without this being
exported.

> 
> >  /**
> >   * svc_reserve - change the space reserved for the reply to a  
> > request.
> > @@ -783,7 +783,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
> >  				(serv->sv_nrthreads+3) * serv->sv_max_mesg);
> >
> >  	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
> > -		svc_sock_received(svsk);
> > +		svc_xprt_received(&svsk->sk_xprt);
> >  		return svc_deferred_recv(rqstp);
> >  	}
> >
> > @@ -800,7 +800,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
> >  			dprintk("svc: recvfrom returned error %d\n", -err);
> >  			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
> >  		}
> > -		svc_sock_received(svsk);
> > +		svc_xprt_received(&svsk->sk_xprt);
> >  		return -EAGAIN;
> >  	}
> >  	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
> > @@ -815,7 +815,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
> >  	/*
> >  	 * Maybe more packets - kick another thread ASAP.
> >  	 */
> > -	svc_sock_received(svsk);
> > +	svc_xprt_received(&svsk->sk_xprt);
> >
> >  	len  = skb->len - sizeof(struct udphdr);
> >  	rqstp->rq_arg.len = len;
> > @@ -1123,8 +1123,6 @@ static struct svc_xprt *svc_tcp_accept(struct  
> > svc_xprt *xprt)
> >  	}
> >  	memcpy(&newsvsk->sk_local, sin, slen);
> >
> > -	svc_sock_received(newsvsk);
> > -
> 
> I assume it's OK to remove svc_sock_received() here (rather than  
> replacing it with svc_xprt_received()) because you are adding a call  
> to xvs_xprt_received() below in svc_recv().
> 
> I think this is a non-trivial change amongst a whole bunch of trivial  
> ones in this patch.  Thus it would be nicer if we did this in a  
> separate patch so you can document your rationale for this change.   
> (Yeah, I think we went over this in e-mail some time ago, but still...)
> 

Yeah, this is probably a good idea. I toyed with removing
svc_xprt_received from the provider all-together and putting it in the
common logic which would remove 10s of calls to svc_xprt_received and
avoid potential races caused by calling it without the BUSY bit held.
The reason I didn't do it was because it means that the xpo_receive
function must complete before queuing the transport back for more I/O,
and this is suboptimal from an MP perspective since some transports can
release the transport early (e.g. RDMA) while it parses the transport
header and does other business.

> >  	if (serv->sv_stats)
> >  		serv->sv_stats->nettcpconn++;
> >
> > @@ -1153,7 +1151,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
> >  		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
> >
> >  	if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
> > -		svc_sock_received(svsk);
> > +		svc_xprt_received(&svsk->sk_xprt);
> >  		return svc_deferred_recv(rqstp);
> >  	}
> >
> > @@ -1193,7 +1191,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
> >  		if (len < want) {
> >  			dprintk("svc: short recvfrom while reading record length (%d of  
> > %lu)\n",
> >  				len, want);
> > -			svc_sock_received(svsk);
> > +			svc_xprt_received(&svsk->sk_xprt);
> >  			return -EAGAIN; /* record header not complete */
> >  		}
> >
> > @@ -1229,7 +1227,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
> >  	if (len < svsk->sk_reclen) {
> >  		dprintk("svc: incomplete TCP record (%d of %d)\n",
> >  			len, svsk->sk_reclen);
> > -		svc_sock_received(svsk);
> > +		svc_xprt_received(&svsk->sk_xprt);
> >  		return -EAGAIN;	/* record not complete */
> >  	}
> >  	len = svsk->sk_reclen;
> > @@ -1269,7 +1267,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
> >  	svsk->sk_reclen = 0;
> >  	svsk->sk_tcplen = 0;
> >
> > -	svc_sock_received(svsk);
> > +	svc_xprt_received(&svsk->sk_xprt);
> >  	if (serv->sv_stats)
> >  		serv->sv_stats->nettcpcnt++;
> >
> > @@ -1282,7 +1280,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
> >   error:
> >  	if (len == -EAGAIN) {
> >  		dprintk("RPC: TCP recvfrom got EAGAIN\n");
> > -		svc_sock_received(svsk);
> > +		svc_xprt_received(&svsk->sk_xprt);
> >  	} else {
> >  		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
> >  		       svsk->sk_xprt.xpt_server->sv_name, -len);
> > @@ -1607,8 +1605,9 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
> >  			 */
> >  			__module_get(newxpt->xpt_class->xcl_owner);
> >  			svc_check_conn_limits(svsk->sk_xprt.xpt_server);
> > +			svc_xprt_received(newxpt);
> >  		}
> > -		svc_sock_received(svsk);
> > +		svc_xprt_received(&svsk->sk_xprt);
> >  	} else {
> >  		dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
> >  			rqstp, pool->sp_id, svsk,
> > @@ -1827,7 +1826,7 @@ int svc_addsock(struct svc_serv *serv,
> >  	else {
> >  		svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS);
> >  		if (svsk) {
> > -			svc_sock_received(svsk);
> > +			svc_xprt_received(&svsk->sk_xprt);
> >  			err = 0;
> >  		}
> >  	}
> > @@ -1882,7 +1881,7 @@ svc_create_socket(struct svc_serv *serv, int  
> > protocol,
> >  	}
> >
> >  	if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
> > -		svc_sock_received(svsk);
> > +		svc_xprt_received(&svsk->sk_xprt);
> >  		return (struct svc_xprt *)svsk;
> >  	}
> >
> 
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com
> -
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 25/38] svc: Move the sockaddr information to svc_xprt
       [not found]     ` <20071129224046.14563.59353.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-11-30 23:20       ` Chuck Lever
  0 siblings, 0 replies; 62+ messages in thread
From: Chuck Lever @ 2007-11-30 23:20 UTC (permalink / raw)
  To: Tom Tucker; +Cc: J. Bruce Fields, linux-nfs

A few minor quibbles below.

On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
> This patch moves the transport sockaddr to the svc_xprt
> structure.  Convenience functions are added to set and
> get the local and remote addresses of a transport from
> the transport provider as well as determine the length
> of a sockaddr.
>
> A transport is responsible for setting the xpt_local
> and xpt_remote addresses in the svc_xprt structure as
> part of transport creation and xpo_accept processing. This
> cannot be done in a generic way and in fact varies
> between TCP, UDP and RDMA. A set of xpo_ functions
> (e.g. getlocalname, getremotename) could have been
> added but this would have resulted in additional
> caching and copying of the addresses around.  Note that
> the xpt_local address should also be set on listening
> endpoints; for TCP/RDMA this is done as part of
> endpoint creation.
>
> For connected transports like TCP and RDMA, the addresses
> never change and can be set once and copied into the
> rqstp structure for each request. For UDP, however, the
> local and remote addresses may change for each request. In
> this case, the address information is obtained from the
> UDP recvmsg info and copied into the rqstp structure from
> there.
>
> A svc_xprt_local_port function was also added that returns
> the local port given a transport. This is used by
> svc_create_xprt when returning the port associated with
> a newly created transport, and later when creating a
> generic find transport service to check if a service is
> already listening on a given port.
>
> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
>
>  include/linux/sunrpc/svc_xprt.h |   51 ++++++++++++++++++++++++++++ 
> ++++++++
>  include/linux/sunrpc/svcsock.h  |    4 ---
>  net/sunrpc/svc_xprt.c           |   31 ++++++++++++++++++++--
>  net/sunrpc/svcsock.c            |   56 ++++++++++++++++++++ 
> +------------------
>  4 files changed, 110 insertions(+), 32 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> svc_xprt.h
> index d93ae27..60bdffc 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -61,6 +61,10 @@ struct svc_xprt {
>  	void			*xpt_auth_cache;/* auth cache */
>  	struct list_head	xpt_deferred;	/* deferred requests that need
>  						 * to be revisted */
> +	struct sockaddr_storage	xpt_local;	/* local address */
> +	int			xpt_locallen;	/* length of address */
> +	struct sockaddr_storage	xpt_remote;	/* remote peer's address */
> +	int			xpt_remotelen;	/* length of address */
>  };
>
>  int	svc_reg_xprt_class(struct svc_xprt_class *);
> @@ -70,9 +74,56 @@ void	svc_xprt_init(struct svc_xprt_class *,  
> struct svc_xprt *,
>  int	svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
>  void	svc_xprt_received(struct svc_xprt *);
>  void	svc_xprt_put(struct svc_xprt *xprt);
> +void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt  
> *xprt);
>  static inline void svc_xprt_get(struct svc_xprt *xprt)
>  {
>  	kref_get(&xprt->xpt_ref);
>  }
> +static inline void svc_xprt_set_local(struct svc_xprt *xprt,
> +				      struct sockaddr *sa, int salen)
> +{
> +	memcpy(&xprt->xpt_local, sa, salen);
> +	xprt->xpt_locallen = salen;
> +}
> +static inline void svc_xprt_set_remote(struct svc_xprt *xprt,
> +				       struct sockaddr *sa, int salen)
> +{
> +	memcpy(&xprt->xpt_remote, sa, salen);
> +	xprt->xpt_remotelen = salen;
> +}
> +static inline int svc_addr_port(struct sockaddr *sa)
> +{
> +	int ret = -1;
> +	switch (sa->sa_family) {
> +	case AF_INET:
> +		ret = ntohs(((struct sockaddr_in *)sa)->sin_port);
> +		break;
> +	case AF_INET6:
> +		ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
> +		break;
> +	}
> +	return ret;
> +}
> +
> +static inline int svc_addr_len(struct sockaddr *sa)
> +{
> +	switch (sa->sa_family) {
> +	case AF_INET:
> +		return sizeof(struct sockaddr_in);
> +	case AF_INET6:
> +		return sizeof(struct sockaddr_in6);
> +	}
> +	return -ENOTSUPP;
> +}

Address lengths should be size_t.  I shouldn't have used "int".

For unrecognized address families, you could return zero here instead  
of a negative number.

> +
> +static inline int svc_xprt_local_port(struct svc_xprt *xprt)
> +{
> +	return svc_addr_port((struct sockaddr *)&xprt->xpt_local);
> +}
> +
> +static inline int svc_xprt_remote_port(struct svc_xprt *xprt)
> +{
> +	return svc_addr_port((struct sockaddr *)&xprt->xpt_remote);
> +}
>
>  #endif /* SUNRPC_SVC_XPRT_H */

Ports in native endianness should be "unsigned short."

> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/ 
> svcsock.h
> index 96a229e..206f092 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -28,10 +28,6 @@ struct svc_sock {
>  	/* private TCP part */
>  	int			sk_reclen;	/* length of record */
>  	int			sk_tcplen;	/* current read length */
> -
> -	struct sockaddr_storage	sk_local;	/* local address */
> -	struct sockaddr_storage	sk_remote;	/* remote peer's address */
> -	int			sk_remotelen;	/* length of address */
>  };
>
>  /*
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index fdf0d8c..d0cbfe0 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -138,7 +138,6 @@ int svc_create_xprt(struct svc_serv *serv, char  
> *xprt_name, unsigned short port,
>  			spin_unlock(&svc_xprt_class_lock);
>  			if (try_module_get(xcl->xcl_owner)) {
>  				struct svc_xprt *newxprt;
> -				ret = 0;
>  				newxprt = xcl->xcl_ops->xpo_create
>  					(serv,
>  					 (struct sockaddr *)&sin, sizeof(sin),
> @@ -146,7 +145,8 @@ int svc_create_xprt(struct svc_serv *serv, char  
> *xprt_name, unsigned short port,
>  				if (IS_ERR(newxprt)) {
>  					module_put(xcl->xcl_owner);
>  					ret = PTR_ERR(newxprt);
> -				}
> +				} else
> +					ret = svc_xprt_local_port(newxprt);
>  			}
>  			goto out;
>  		}
> @@ -157,3 +157,30 @@ int svc_create_xprt(struct svc_serv *serv,  
> char *xprt_name, unsigned short port,
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(svc_create_xprt);
> +
> +/*
> + * Copy the local and remote xprt addresses to the rqstp structure
> + */
> +void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt  
> *xprt)
> +{
> +	struct sockaddr *sin;
> +
> +	memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen);
> +	rqstp->rq_addrlen = xprt->xpt_remotelen;
> +
> +	/*
> +	 * Destination address in request is needed for binding the
> +	 * source address in RPC replies/callbacks later.
> +	 */
> +	sin = (struct sockaddr *)&xprt->xpt_local;
> +	switch (sin->sa_family) {
> +	case AF_INET:
> +		rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
> +		break;
> +	case AF_INET6:
> +		rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
> +		break;
> +	}
> +}
> +EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs);
> +
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 62b5225..e8cfeeb 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -638,33 +638,13 @@ svc_recvfrom(struct svc_rqst *rqstp, struct  
> kvec *iov, int nr, int buflen)
>  	struct msghdr msg = {
>  		.msg_flags	= MSG_DONTWAIT,
>  	};
> -	struct sockaddr *sin;
>  	int len;
>
>  	len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
>  				msg.msg_flags);
>
> -	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
> -	 */
> -	memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen);
> -	rqstp->rq_addrlen = svsk->sk_remotelen;
> -
> -	/* Destination address in request is needed for binding the
> -	 * source address in RPC callbacks later.
> -	 */
> -	sin = (struct sockaddr *)&svsk->sk_local;
> -	switch (sin->sa_family) {
> -	case AF_INET:
> -		rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr;
> -		break;
> -	case AF_INET6:
> -		rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr;
> -		break;
> -	}
> -
>  	dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
>  		svsk, iov[0].iov_base, iov[0].iov_len, len);
> -
>  	return len;
>  }
>
> @@ -734,8 +714,15 @@ svc_write_space(struct sock *sk)
>  	}
>  }
>
> -static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp,
> -					    struct cmsghdr *cmh)
> +/*
> + * Copy the UDP datagram's destination address to the rqstp  
> structure.
> + * The 'destination' address in this case is the address to which the
> + * peer sent the datagram, i.e. our local address. For multihomed
> + * hosts, this can change from msg to msg. Note that only the IP
> + * address changes, the port number should remain the same.
> + */
> +static void svc_udp_get_dest_address(struct svc_rqst *rqstp,
> +				     struct cmsghdr *cmh)
>  {
>  	switch (rqstp->rq_sock->sk_sk->sk_family) {
>  	case AF_INET: {
> @@ -802,7 +789,10 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
>  		svc_xprt_received(&svsk->sk_xprt);
>  		return -EAGAIN;
>  	}
> -	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
> +	len = svc_addr_len(svc_addr(rqstp));
> +	if (len < 0)
> +		return len;
> +	rqstp->rq_addrlen = len;
>  	if (skb->tstamp.tv64 == 0) {
>  		skb->tstamp = ktime_get_real();
>  		/* Don't enable netstamp, sunrpc doesn't
> @@ -1114,14 +1104,13 @@ static struct svc_xprt *svc_tcp_accept 
> (struct svc_xprt *xprt)
>  	if (!(newsvsk = svc_setup_socket(serv, newsock, &err,
>  				 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY))))
>  		goto failed;
> -	memcpy(&newsvsk->sk_remote, sin, slen);
> -	newsvsk->sk_remotelen = slen;
> +	svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
>  	err = kernel_getsockname(newsock, sin, &slen);
>  	if (unlikely(err < 0)) {
>  		dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
>  		slen = offsetof(struct sockaddr, sa_data);
>  	}
> -	memcpy(&newsvsk->sk_local, sin, slen);
> +	svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
>
>  	if (serv->sv_stats)
>  		serv->sv_stats->nettcpconn++;
> @@ -1262,6 +1251,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  	svsk->sk_reclen = 0;
>  	svsk->sk_tcplen = 0;
>
> +	svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
>  	svc_xprt_received(&svsk->sk_xprt);
>  	if (serv->sv_stats)
>  		serv->sv_stats->nettcpcnt++;
> @@ -1821,6 +1811,11 @@ int svc_addsock(struct svc_serv *serv,
>  	else {
>  		svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS);
>  		if (svsk) {
> +			struct sockaddr_storage addr;
> +			struct sockaddr *sin = (struct sockaddr *)&addr;
> +			int salen;
> +			if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
> +				svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
>  			svc_xprt_received(&svsk->sk_xprt);
>  			err = 0;
>  		}
> @@ -1846,6 +1841,9 @@ svc_create_socket(struct svc_serv *serv, int  
> protocol,
>  	int		error;
>  	int		type;
>  	char		buf[RPC_MAX_ADDRBUFLEN];
> +	struct sockaddr_storage addr;
> +	struct sockaddr *newsin = (struct sockaddr *)&addr;
> +	int		newlen;
>
>  	dprintk("svc: svc_create_socket(%s, %d, %s)\n",
>  			serv->sv_program->pg_name, protocol,
> @@ -1870,12 +1868,18 @@ svc_create_socket(struct svc_serv *serv,  
> int protocol,
>  	if (error < 0)
>  		goto bummer;
>
> +	newlen = len;
> +	error = kernel_getsockname(sock, newsin, &newlen);
> +	if (error < 0)
> +		goto bummer;
> +
>  	if (protocol == IPPROTO_TCP) {
>  		if ((error = kernel_listen(sock, 64)) < 0)
>  			goto bummer;
>  	}
>
>  	if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
> +		svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
>  		svc_xprt_received(&svsk->sk_xprt);
>  		return (struct svc_xprt *)svsk;
>  	}


--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 21/38] svc: Change svc_sock_received to svc_xprt_received and export it
       [not found]           ` <1196464634.5432.68.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
@ 2007-11-30 23:23             ` Chuck Lever
  0 siblings, 0 replies; 62+ messages in thread
From: Chuck Lever @ 2007-11-30 23:23 UTC (permalink / raw)
  To: Tom Tucker; +Cc: J. Bruce Fields, linux-nfs

On Nov 30, 2007, at 6:17 PM, Tom Tucker wrote:
> On Fri, 2007-11-30 at 16:33 -0500, Chuck Lever wrote:
>> On Nov 29, 2007, at 5:40 PM, Tom Tucker wrote:
>>> {
>>> -	svsk->sk_xprt.xpt_pool = NULL;
>>> -	clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
>>> -	svc_xprt_enqueue(&svsk->sk_xprt);
>>> +	xprt->xpt_pool = NULL;
>>> +	clear_bit(XPT_BUSY, &xprt->xpt_flags);
>>> +	svc_xprt_enqueue(xprt);
>>>  }
>>> -
>>> +EXPORT_SYMBOL_GPL(svc_xprt_received);
>>
>> When I submitted the RPC client-side transport switch, Trond
>> suggested we add the EXPORTs later when it was clear why they are
>> needed.  This may be a personal preference of the server maintainer,
>> but I just thought I'd mention the possibility; it seems to make
>> sense here too.
>
> Sure, but we already have a server side provider that helps accelerate
> the proving process. The svcrdma module won't build without this being
> exported.

That's fine.  You just add the EXPORTs when you introduce the svcrdma  
module.

No biggie.

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 33/38] svc: Add transport hdr size for defer/revisit
       [not found]     ` <20071129224103.14563.72780.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-11-30 23:51       ` Chuck Lever
  0 siblings, 0 replies; 62+ messages in thread
From: Chuck Lever @ 2007-11-30 23:51 UTC (permalink / raw)
  To: Tom Tucker; +Cc: J. Bruce Fields, linux-nfs

On Nov 29, 2007, at 5:41 PM, Tom Tucker wrote:
> Some transports have a header in front of the RPC header. The current
> defer/revisit processing considers only the iov_len and arg_len to
> determine how much to back up when saving the original request
> to revisit. Add a field to the rqstp structure to save the size
> of the transport header so svc_defer can correctly compute
> the start of a request.
>
> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
>
>  include/linux/sunrpc/svc.h |    2 ++
>  net/sunrpc/svc_xprt.c      |   36 ++++++++++++++++++++++++++ 
> +---------
>  net/sunrpc/svcsock.c       |    2 ++
>  3 files changed, 31 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 04eb20e..f2ada2a 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -217,6 +217,7 @@ struct svc_rqst {
>  	void *			rq_xprt_ctxt;	/* transport specific context ptr */
>  	struct svc_deferred_req*rq_deferred;	/* deferred request we are  
> replaying */
>
> +	size_t			rq_xprt_hlen;	/* xprt header len */
>  	struct xdr_buf		rq_arg;
>  	struct xdr_buf		rq_res;
>  	struct page *		rq_pages[RPCSVC_MAXPAGES];
> @@ -322,6 +323,7 @@ struct svc_deferred_req {
>  	size_t			addrlen;
>  	union svc_addr_u	daddr;	/* where reply must come from */
>  	struct cache_deferred_req handle;
> +	int			xprt_hlen;
>  	int			argslen;
>  	__be32			args[0];
>  };

Why is xprt_hlen an int if rq_xprt_hlen is a size_t?  Shouldn't they  
both be size_t?

I don't see xprt_hlen going negative, but it is used in a bunch of  
computations involving unsigned ints and size_ts.  Since size_t can  
be wider than 32 bits on non-i386, I think we should be defensive  
about not using ints as temporary variables when doing computations  
with iov_base and iov_len.

> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 56204e9..b31ba0e 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -29,7 +29,6 @@
>  #include <linux/sunrpc/types.h>
>  #include <linux/sunrpc/clnt.h>
>  #include <linux/sunrpc/xdr.h>
> -#include <linux/sunrpc/svcsock.h>
>  #include <linux/sunrpc/stats.h>
>  #include <linux/sunrpc/svc_xprt.h>
>
> @@ -827,10 +826,18 @@ static void svc_revisit(struct  
> cache_deferred_req *dreq, int too_many)
>  	svc_xprt_put(xprt);
>  }
>
> +/*
> + * Save the request off for later processing. The request buffer  
> looks
> + * like this:
> + *
> + * <xprt-header><rpc-header><rpc-pagelist><rpc-tail>
> + *
> + * This code can only handle requests that consist of an xprt-header
> + * and rpc-header.
> + */
>  static struct cache_deferred_req *svc_defer(struct cache_req *req)
>  {
>  	struct svc_rqst *rqstp = container_of(req, struct svc_rqst,  
> rq_chandle);
> -	int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len);
>  	struct svc_deferred_req *dr;
>
>  	if (rqstp->rq_arg.page_len)
> @@ -839,8 +846,10 @@ static struct cache_deferred_req *svc_defer 
> (struct cache_req *req)
>  		dr = rqstp->rq_deferred;
>  		rqstp->rq_deferred = NULL;
>  	} else {
> -		int skip  = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
> +		int skip;
> +		int size;

How about:

	size_t size, skip;

instead?

>  		/* FIXME maybe discard if size too large */
> +		size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len;
>  		dr = kmalloc(size, GFP_KERNEL);
>  		if (dr == NULL)
>  			return NULL;
> @@ -851,8 +860,12 @@ static struct cache_deferred_req *svc_defer 
> (struct cache_req *req)
>  		dr->addrlen = rqstp->rq_addrlen;
>  		dr->daddr = rqstp->rq_daddr;
>  		dr->argslen = rqstp->rq_arg.len >> 2;
> -		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip,
> -		       dr->argslen<<2);
> +		dr->xprt_hlen = rqstp->rq_xprt_hlen;
> +
> +		/* back up head to the start of the buffer and copy */
> +		skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
> +		memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
> +		       dr->argslen << 2);
>  	}
>  	svc_xprt_get(rqstp->rq_xprt);
>  	dr->xprt = rqstp->rq_xprt;
> @@ -868,16 +881,21 @@ static int svc_deferred_recv(struct svc_rqst  
> *rqstp)
>  {
>  	struct svc_deferred_req *dr = rqstp->rq_deferred;
>
> -	rqstp->rq_arg.head[0].iov_base = dr->args;
> -	rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
> +	/* setup iov_base past transport header */
> +	rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
> +	/* The iov_len does not include the transport header bytes */
> +	rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen;
>  	rqstp->rq_arg.page_len = 0;
> -	rqstp->rq_arg.len = dr->argslen<<2;
> +	/* The rq_arg.len includes the transport header bytes */
> +	rqstp->rq_arg.len     = dr->argslen<<2;
>  	rqstp->rq_prot        = dr->prot;
>  	memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
>  	rqstp->rq_addrlen     = dr->addrlen;
> +	/* Save off transport header len in case we get deferred again */
> +	rqstp->rq_xprt_hlen   = dr->xprt_hlen;
>  	rqstp->rq_daddr       = dr->daddr;
>  	rqstp->rq_respages    = rqstp->rq_pages;
> -	return dr->argslen<<2;
> +	return (dr->argslen<<2) - dr->xprt_hlen;
>  }
>
>
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 23a2ab6..03207c9 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -397,6 +397,8 @@ svc_recvfrom(struct svc_rqst *rqstp, struct  
> kvec *iov, int nr, int buflen)
>  	};
>  	int len;
>
> +	rqstp->rq_xprt_hlen = 0;
> +
>  	len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
>  				msg.msg_flags);
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux- 
> nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com




^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 36/38] svc: Add svc API that queries for a transport instance
       [not found]     ` <20071129224109.14563.34563.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-12-01  0:00       ` Chuck Lever
  0 siblings, 0 replies; 62+ messages in thread
From: Chuck Lever @ 2007-12-01  0:00 UTC (permalink / raw)
  To: Tom Tucker; +Cc: J. Bruce Fields, linux-nfs

On Nov 29, 2007, at 5:41 PM, Tom Tucker wrote:
> Add a new svc function that allows a service to query whether a
> transport instance has already been created. This is used in lockd
> to determine whether or not a transport needs to be created when
> a lockd instance is brought up.
>
> Specifying 0 for the address family or port is effectively a wild- 
> card,
> and will result in matching the first transport in the service's list
> that has a matching class name.

Suggestion: this paragraph ^^^ would be good documentation to add to  
the block comment in front of svc_find_xprt().

> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
>
>  fs/lockd/svc.c                  |   16 ++--------------
>  include/linux/sunrpc/svc_xprt.h |    2 ++
>  net/sunrpc/svc_xprt.c           |   31 ++++++++++++++++++++++++++++ 
> +++
>  3 files changed, 35 insertions(+), 14 deletions(-)
>
> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> index a8e79a9..470af01 100644
> --- a/fs/lockd/svc.c
> +++ b/fs/lockd/svc.c
> @@ -219,18 +219,6 @@ lockd(struct svc_rqst *rqstp)
>  	module_put_and_exit(0);
>  }
>
> -static int find_xprt(struct svc_serv *serv, char *proto)
> -{
> -	struct svc_xprt *xprt;
> -	int found = 0;
> -	list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
> -		if (strcmp(xprt->xpt_class->xcl_name, proto) == 0) {
> -			found = 1;
> -			break;
> -		}
> -	return found;
> -}
> -
>  /*
>   * Make any sockets that are needed but not present.
>   * If nlm_udpport or nlm_tcpport were set as module
> @@ -242,11 +230,11 @@ static int make_socks(struct svc_serv *serv,  
> int proto)
>  	int err = 0;
>
>  	if (proto == IPPROTO_UDP || nlm_udpport)
> -		if (!find_xprt(serv, "udp"))
> +		if (!svc_find_xprt(serv, "udp", 0, 0))
>  			err = svc_create_xprt(serv, "udp", nlm_udpport,
>  					      SVC_SOCK_DEFAULTS);
>  	if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport))
> -		if (!find_xprt(serv, "tcp"))
> +		if (!svc_find_xprt(serv, "tcp", 0, 0))
>  			err = svc_create_xprt(serv, "tcp", nlm_tcpport,
>  					      SVC_SOCK_DEFAULTS);
>
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> svc_xprt.h
> index c2fa41d..30fcc82 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -80,6 +80,8 @@ void	svc_xprt_enqueue(struct svc_xprt *xprt);
>  int	svc_port_is_privileged(struct sockaddr *sin);
>  void	svc_delete_xprt(struct svc_xprt *xprt);
>  int	svc_print_xprts(char *buf, int maxlen);
> +struct svc_xprt *
> +svc_find_xprt(struct svc_serv *serv, char *xprt_class, int af, int  
> port);

Nit: usually

struct svc_xprt *svc_find_xprt(struct svc_serv *serv,
							char *xprt_class, int af, int port);

is preferred instead for forward declarations.

>  static inline void svc_xprt_get(struct svc_xprt *xprt)
>  {
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 7416e66..247f0fb 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -945,3 +945,34 @@ static struct svc_deferred_req  
> *svc_deferred_dequeue(struct svc_xprt *xprt)
>  	spin_unlock(&xprt->xpt_lock);
>  	return dr;
>  }
> +
> +/*
> + * Return the transport instance pointer for the endpoint accepting
> + * connections/peer traffic from the specified transport class,
> + * address family and port.
> + */
> +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
> +			       int af, int port)
> +{
> +	struct svc_xprt *xprt;
> +	struct svc_xprt *found = NULL;
> +
> +	/* Sanity check the args */
> +	if (!serv || !xcl_name)
> +		return found;
> +
> +	spin_lock_bh(&serv->sv_lock);
> +	list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
> +		if (strcmp(xprt->xpt_class->xcl_name, xcl_name))
> +			continue;
> +		if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
> +			continue;
> +		if (port && port != svc_xprt_local_port(xprt))
> +			continue;
> +		found = xprt;
> +		break;
> +	}
> +	spin_unlock_bh(&serv->sv_lock);
> +	return found;
> +}
> +EXPORT_SYMBOL_GPL(svc_find_xprt);

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files
       [not found]     ` <20071129224105.14563.48684.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
@ 2007-12-03 16:44       ` Chuck Lever
  2007-12-03 16:58         ` J. Bruce Fields
  0 siblings, 1 reply; 62+ messages in thread
From: Chuck Lever @ 2007-12-03 16:44 UTC (permalink / raw)
  To: Tom Tucker; +Cc: J. Bruce Fields, linux-nfs

On Nov 29, 2007, at 5:41 PM, Tom Tucker wrote:
> Add a file that when read lists the set of registered svc
> transports.
>
> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
>
>  include/linux/sunrpc/svc_xprt.h |    2 ++
>  net/sunrpc/svc_xprt.c           |   28 ++++++++++++++++++++++++++++
>  net/sunrpc/sysctl.c             |   31 ++++++++++++++++++++++++++++ 
> +++
>  3 files changed, 61 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/ 
> svc_xprt.h
> index 36f8b09..c2fa41d 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -79,11 +79,13 @@ void	svc_xprt_copy_addrs(struct svc_rqst  
> *rqstp, struct svc_xprt *xprt);
>  void	svc_xprt_enqueue(struct svc_xprt *xprt);
>  int	svc_port_is_privileged(struct sockaddr *sin);
>  void	svc_delete_xprt(struct svc_xprt *xprt);
> +int	svc_print_xprts(char *buf, int maxlen);
>
>  static inline void svc_xprt_get(struct svc_xprt *xprt)
>  {
>  	kref_get(&xprt->xpt_ref);
>  }
> +
>  static inline void svc_xprt_set_local(struct svc_xprt *xprt,
>  				      struct sockaddr *sa, int salen)
>  {
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index b31ba0e..7416e66 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -93,6 +93,34 @@ int svc_unreg_xprt_class(struct svc_xprt_class  
> *xcl)
>  }
>  EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
>
> +/*
> + * Format the transport list for printing
> + */
> +int svc_print_xprts(char *buf, int maxlen)

One last one, then I will crawl back into my hole.

Arguments and variables that handle string lengths should be of type  
size_t (or at the very least, should be unsigned).

Otherwise, I don't see any architectural problems with the patch series.

> +{
> +	struct list_head *le;
> +	char tmpstr[80];
> +	int len = 0;
> +	buf[0] = '\0';
> +
> +	spin_lock(&svc_xprt_class_lock);
> +	list_for_each(le, &svc_xprt_class_list) {
> +		int slen;
> +		struct svc_xprt_class *xcl =
> +			list_entry(le, struct svc_xprt_class, xcl_list);
> +
> +		sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
> +		slen = strlen(tmpstr);
> +		if (len + slen > maxlen)
> +			break;
> +		len += slen;
> +		strcat(buf, tmpstr);
> +	}
> +	spin_unlock(&svc_xprt_class_lock);
> +
> +	return len;
> +}
> +
>  static void svc_xprt_free(struct kref *kref)
>  {
>  	struct svc_xprt *xprt =
> diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
> index 2be714e..fd7cf59 100644
> --- a/net/sunrpc/sysctl.c
> +++ b/net/sunrpc/sysctl.c
> @@ -18,6 +18,7 @@
>  #include <linux/sunrpc/types.h>
>  #include <linux/sunrpc/sched.h>
>  #include <linux/sunrpc/stats.h>
> +#include <linux/sunrpc/svc_xprt.h>
>
>  /*
>   * Declare the debug flags here
> @@ -48,6 +49,30 @@ rpc_unregister_sysctl(void)
>  	}
>  }
>
> +static int proc_do_xprt(ctl_table *table, int write, struct file  
> *file,
> +			void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	char tmpbuf[256];
> +	int len;
> +	if ((*ppos && !write) || !*lenp) {
> +		*lenp = 0;
> +		return 0;
> +	}
> +	if (write)
> +		return -EINVAL;
> +	else {
> +		len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
> +		if (!access_ok(VERIFY_WRITE, buffer, len))
> +			return -EFAULT;
> +
> +		if (__copy_to_user(buffer, tmpbuf, len))
> +			return -EFAULT;
> +	}
> +	*lenp -= len;
> +	*ppos += len;
> +	return 0;
> +}
> +
>  static int
>  proc_dodebug(ctl_table *table, int write, struct file *file,
>  				void __user *buffer, size_t *lenp, loff_t *ppos)
> @@ -140,6 +165,12 @@ static ctl_table debug_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= &proc_dodebug
>  	},
> +	{
> +		.procname	= "transports",
> +		.maxlen		= 256,
> +		.mode		= 0444,
> +		.proc_handler	= &proc_do_xprt,
> +	},
>  	{ .ctl_name = 0 }
>  };


--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files
  2007-12-03 16:44       ` Chuck Lever
@ 2007-12-03 16:58         ` J. Bruce Fields
  2007-12-03 18:30           ` Chuck Lever
  2007-12-05  8:44           ` Greg Banks
  0 siblings, 2 replies; 62+ messages in thread
From: J. Bruce Fields @ 2007-12-03 16:58 UTC (permalink / raw)
  To: Chuck Lever; +Cc: Tom Tucker, linux-nfs, Neil Brown, Chuck Lever, Greg Banks

On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
> One last one, then I will crawl back into my hole.

By the way, based on who I recall making a lot of comments, I'm planning
on just adding:

	Acked-by: Neil Brown <neilb@suse.de>
	Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
	Reviewed-by: Greg Banks <gnb@sgi.com>

to all of the server transport-switch and rdma patches.  Does that sound
right?

And I'm assuming there are no objections to submitting this come the
next merge window.

--b.

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files
  2007-12-03 16:58         ` J. Bruce Fields
@ 2007-12-03 18:30           ` Chuck Lever
  2007-12-03 19:10             ` Tom Tucker
  2007-12-05  8:44           ` Greg Banks
  1 sibling, 1 reply; 62+ messages in thread
From: Chuck Lever @ 2007-12-03 18:30 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Tom Tucker, linux-nfs, Neil Brown, Greg Banks

On Dec 3, 2007, at 11:58 AM, J. Bruce Fields wrote:
> On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
>> One last one, then I will crawl back into my hole.
>
> By the way, based on who I recall making a lot of comments, I'm  
> planning
> on just adding:
>
> 	Acked-by: Neil Brown <neilb@suse.de>
> 	Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
> 	Reviewed-by: Greg Banks <gnb@sgi.com>
>
> to all of the server transport-switch and rdma patches.  Does that  
> sound
> right?

OK for the transport switch.  I'm not qualified to review the bulk of  
the RDMA work, so I would prefer that you don't add Reviewed-by "me"  
for those patches.  My eyes glazed over a few lines after the GPL  
boilerplate.

> And I'm assuming there are no objections to submitting this come the
> next merge window.


Refactoring the server-side write space logic has exposed some  
problems that really shouldn't be allowed to stand.  I think we  
should resolve the issues in the TCP write space callback before  
merging.  Realistically those problems are non-architectural and  
should be fixable before the window opens.

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files
  2007-12-03 18:30           ` Chuck Lever
@ 2007-12-03 19:10             ` Tom Tucker
       [not found]               ` <1196709058.5811.21.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
  0 siblings, 1 reply; 62+ messages in thread
From: Tom Tucker @ 2007-12-03 19:10 UTC (permalink / raw)
  To: Chuck Lever; +Cc: J. Bruce Fields, linux-nfs, Neil Brown, Greg Banks


On Mon, 2007-12-03 at 13:30 -0500, Chuck Lever wrote:
> On Dec 3, 2007, at 11:58 AM, J. Bruce Fields wrote:
> > On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
> >> One last one, then I will crawl back into my hole.
> >
> > By the way, based on who I recall making a lot of comments, I'm  
> > planning
> > on just adding:
> >
> > 	Acked-by: Neil Brown <neilb@suse.de>
> > 	Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
> > 	Reviewed-by: Greg Banks <gnb@sgi.com>
> >
> > to all of the server transport-switch and rdma patches.  Does that  
> > sound
> > right?
> 
> OK for the transport switch.  I'm not qualified to review the bulk of  
> the RDMA work, so I would prefer that you don't add Reviewed-by "me"  
> for those patches.  My eyes glazed over a few lines after the GPL  
> boilerplate.
> 
> > And I'm assuming there are no objections to submitting this come the
> > next merge window.
> 
> 
> Refactoring the server-side write space logic has exposed some  
> problems that really shouldn't be allowed to stand.  I think we  
> should resolve the issues in the TCP write space callback before  
> merging.  Realistically those problems are non-architectural and  
> should be fixable before the window opens.
> 

Could you be more specific on what these problems are?

> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com


^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files
       [not found]               ` <1196709058.5811.21.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
@ 2007-12-03 20:36                 ` Chuck Lever
  2007-12-04  0:45                   ` Tom Tucker
  0 siblings, 1 reply; 62+ messages in thread
From: Chuck Lever @ 2007-12-03 20:36 UTC (permalink / raw)
  To: Tom Tucker, J. Bruce Fields, Neil Brown; +Cc: linux-nfs, Greg Banks

On Dec 3, 2007, at 2:10 PM, Tom Tucker wrote:
> On Mon, 2007-12-03 at 13:30 -0500, Chuck Lever wrote:
>> On Dec 3, 2007, at 11:58 AM, J. Bruce Fields wrote:
>>> On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
>>>> One last one, then I will crawl back into my hole.
>>>
>>> By the way, based on who I recall making a lot of comments, I'm
>>> planning
>>> on just adding:
>>>
>>> 	Acked-by: Neil Brown <neilb@suse.de>
>>> 	Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
>>> 	Reviewed-by: Greg Banks <gnb@sgi.com>
>>>
>>> to all of the server transport-switch and rdma patches.  Does that
>>> sound
>>> right?
>>
>> OK for the transport switch.  I'm not qualified to review the bulk of
>> the RDMA work, so I would prefer that you don't add Reviewed-by "me"
>> for those patches.  My eyes glazed over a few lines after the GPL
>> boilerplate.
>>
>>> And I'm assuming there are no objections to submitting this come the
>>> next merge window.
>>
>> Refactoring the server-side write space logic has exposed some
>> problems that really shouldn't be allowed to stand.  I think we
>> should resolve the issues in the TCP write space callback before
>> merging.  Realistically those problems are non-architectural and
>> should be fixable before the window opens.
>
> Could you be more specific on what these problems are?

The problems we discussed on Friday about svc_tcp_has_wspace(),  
introduced in 9/38.

The mess is hidden by implicit type casts in the write space check,  
replaced by 9/38, in svc_sock_enqueue().   The return type of  
svc_sock_wspace() is unsigned long, which means the other side of the  
comparison in svc_sock_enqueue() (the sum that becomes "required" in  
your patch) is implicitly promoted to unsigned long before the  
comparison is done.

Your patch misses the implicit cast at least by making "required" an  
int in the new callback functions.  "required" should be an unsigned  
long in both the UDP and TCP callback to preserve the semantics of  
the existing logic.

It's also the case that sk_stream_wspace() can return a negative  
value if sk_wmem_queued somehow becomes larger than sk_sndbuf.   
However, in the existing svc_sock_enqueue() logic, a negative result  
from sk_stream_wspace() is converted to an unsigned long, making it a  
large positive number.  The server then thinks it may continue  
writing to a socket whose buffer is already full.

I'm also worried about whether sk_reserved, which is an int and is  
incremented and decremented without a check to see if it has gone  
negative, can go negative during normal operation -- and if it does,  
what does that do to the value of "required" and the result of the  
write space check, in either the UDP or TCP case?

In addition, the server's TCP write space check is missing a  
comparison that every other TCP write space callback in the kernel  
has (comparing sk_stream_wspace and sk_stream_min_wspace).  If we  
don't include it here, then we need some testing to validate that it  
isn't needed, and a comment to explain why this write space callback  
is different from the others.

I was hoping for some comment from Neil or Bruce.

 From Friday's post:
> If sk_reserved goes negative, it will be converted to unsigned, and  
> become a very large positive number.  The result of the sum will be  
> recast back to an int when it's assigned to "required," and we  
> probably get a reasonable result.  I doubt an explicit cast will  
> change things at all.
>
> Instead, perhaps we should add an explicit check to ensure  
> sk_reserved is a reasonable positive value before doing any other  
> checks.  (Likewise in the UDP case as well).
>
> I wonder if this is really the correct write space check to use for  
> TCP, though.  I remember fixing a similar issue in the RPC client a  
> long time ago -- both UDP and TCP used the same wspace check.  It  
> resulted in the sk_write_space callback hammering on the RPC  
> client, and forward progress on TCP socket writes would slow to a  
> crawl.
>
> You probably want something like this instead:
>
> 	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>
> 	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
> 	wspace = sk_stream_wspace(svsk->sk_sk);
>
> 	if (wspace < sk_stream_min_wspace(svsk->sk_sk))
> 		return 0;
> 	if (required * 2 > wspace)
> 		return 0;
>
> 	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> 	return 1;
>
> The first test mimics sk_stream_write_space() and xs_tcp_write_space 
> ().  I'm still unsure what to do about the possibility of one of  
> these signed integers going negative on us.


--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files
  2007-12-03 20:36                 ` Chuck Lever
@ 2007-12-04  0:45                   ` Tom Tucker
  0 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-12-04  0:45 UTC (permalink / raw)
  To: Chuck Lever, J. Bruce Fields, NeilBrown; +Cc: linux-nfs, Greg Banks




On 12/3/07 2:36 PM, "Chuck Lever" <chuck.lever@oracle.com> wrote:

> On Dec 3, 2007, at 2:10 PM, Tom Tucker wrote:
>> On Mon, 2007-12-03 at 13:30 -0500, Chuck Lever wrote:
>>> On Dec 3, 2007, at 11:58 AM, J. Bruce Fields wrote:
>>>> On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
>> 
>> Could you be more specific on what these problems are?

[...snip...]

> 
> The problems we discussed on Friday about svc_tcp_has_wspace(),
> introduced in 9/38.
> 
> The mess is hidden by implicit type casts in the write space check,
> replaced by 9/38, in svc_sock_enqueue().   The return type of
> svc_sock_wspace() is unsigned long, which means the other side of the
> comparison in svc_sock_enqueue() (the sum that becomes "required" in
> your patch) is implicitly promoted to unsigned long before the
> comparison is done.
> 
> Your patch misses the implicit cast at least by making "required" an
> int in the new callback functions.  "required" should be an unsigned
> long in both the UDP and TCP callback to preserve the semantics of
> the existing logic.
> 
> It's also the case that sk_stream_wspace() can return a negative
> value if sk_wmem_queued somehow becomes larger than sk_sndbuf.
> However, in the existing svc_sock_enqueue() logic, a negative result
> from sk_stream_wspace() is converted to an unsigned long, making it a
> large positive number.  The server then thinks it may continue
> writing to a socket whose buffer is already full.
> 
> I'm also worried about whether sk_reserved, which is an int and is
> incremented and decremented without a check to see if it has gone
> negative, can go negative during normal operation -- and if it does,
> what does that do to the value of "required" and the result of the
> write space check, in either the UDP or TCP case?
> 
> In addition, the server's TCP write space check is missing a
> comparison that every other TCP write space callback in the kernel
> has (comparing sk_stream_wspace and sk_stream_min_wspace).  If we
> don't include it here, then we need some testing to validate that it
> isn't needed, and a comment to explain why this write space callback
> is different from the others.
> 
> I was hoping for some comment from Neil or Bruce.

Ok, cool. I just didn't get the map from "refactoring" to this. These
changes are straightforward. I have already modified the patch to resolve
the type promotion issue and added a check for negative.  I planned on
reposting the whole series with the updates after we get the build issues
hammered out.

Thanks,
Tom

> 
>  From Friday's post:
>> If sk_reserved goes negative, it will be converted to unsigned, and
>> become a very large positive number.  The result of the sum will be
>> recast back to an int when it's assigned to "required," and we
>> probably get a reasonable result.  I doubt an explicit cast will
>> change things at all.
>> 
>> Instead, perhaps we should add an explicit check to ensure
>> sk_reserved is a reasonable positive value before doing any other
>> checks.  (Likewise in the UDP case as well).
>> 
>> I wonder if this is really the correct write space check to use for
>> TCP, though.  I remember fixing a similar issue in the RPC client a
>> long time ago -- both UDP and TCP used the same wspace check.  It
>> resulted in the sk_write_space callback hammering on the RPC
>> client, and forward progress on TCP socket writes would slow to a
>> crawl.
>> 
>> You probably want something like this instead:
>> 
>> set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>> 
>> required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
>> wspace = sk_stream_wspace(svsk->sk_sk);
>> 
>> if (wspace < sk_stream_min_wspace(svsk->sk_sk))
>> return 0;
>> if (required * 2 > wspace)
>> return 0;
>> 
>> clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>> return 1;
>> 
>> The first test mimics sk_stream_write_space() and xs_tcp_write_space
>> ().  I'm still unsure what to do about the possibility of one of
>> these signed integers going negative on us.
> 
> 
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com



^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files
  2007-12-03 16:58         ` J. Bruce Fields
  2007-12-03 18:30           ` Chuck Lever
@ 2007-12-05  8:44           ` Greg Banks
  1 sibling, 0 replies; 62+ messages in thread
From: Greg Banks @ 2007-12-05  8:44 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Chuck Lever, Tom Tucker, linux-nfs, Neil Brown

On Mon, Dec 03, 2007 at 11:58:53AM -0500, J. Bruce Fields wrote:
> On Mon, Dec 03, 2007 at 11:44:15AM -0500, Chuck Lever wrote:
> > One last one, then I will crawl back into my hole.
> 
> By the way, based on who I recall making a lot of comments, I'm planning
> on just adding:
> 
> 	Acked-by: Neil Brown <neilb@suse.de>
> 	Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
> 	Reviewed-by: Greg Banks <gnb@sgi.com>
> 
> to all of the server transport-switch and rdma patches.  Does that sound
> right?

For the server transport switch patches: yes please.

For the server RDMA patches: I've not had a chance to read the latest
versions (a long-running drama has been sucking up all my time for some
weeks now), but after all the discussion I'm not expecting any nasty
surprises.  I'll get back to you in a few days.

> And I'm assuming there are no objections to submitting this come the
> next merge window.

Agreed.

Greg.
-- 
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
Apparently, I'm Bedevere.  Which MPHG character are you?
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 62+ messages in thread

* Re: [RFC,PATCH 09/38] svc: Add a transport function that checks for write space
  2007-11-30 22:43             ` Chuck Lever
@ 2007-12-10 20:43               ` Tom Tucker
  0 siblings, 0 replies; 62+ messages in thread
From: Tom Tucker @ 2007-12-10 20:43 UTC (permalink / raw)
  To: Chuck Lever; +Cc: J. Bruce Fields, Neil Brown, linux-nfs


On Fri, 2007-11-30 at 17:43 -0500, Chuck Lever wrote:
> On Nov 30, 2007, at 4:39 PM, Tom Tucker wrote:

[...snip...]

> >
> > For this one, let's leave required signed and add an explicit cast to
> > serv->sv_max_mesg. Sound ok?
> 
> If sk_reserved goes negative, it will be converted to unsigned, and  
> become a very large positive number.  The result of the sum will be  
> recast back to an int when it's assigned to "required," and we  
> probably get a reasonable result.  I doubt an explicit cast will  
> change things at all.
> 
> Instead, perhaps we should add an explicit check to ensure  
> sk_reserved is a reasonable positive value before doing any other  
> checks.  (Likewise in the UDP case as well).
> 
> I wonder if this is really the correct write space check to use for  
> TCP, though.  I remember fixing a similar issue in the RPC client a  
> long time ago -- both UDP and TCP used the same wspace check.  It  
> resulted in the sk_write_space callback hammering on the RPC client,  
> and forward progress on TCP socket writes would slow to a crawl.

For the server, the callback function is svc_write_space. Does the
sk_stream_wspace() < sk_stream_min_wspace() check belong in there as
well? I think this callback gets invoked whenever productive acks are
received for the connection. In other words, I don't see how it avoids
"hammering" the callback, rather it avoids fruitlessly waking up (i.e.
calling svc_xprt_enqueue) a transport.

I'm going to optimistically add it. Please let me know if there are
objections.

> 
> You probably want something like this instead:
> 
> 	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> 
> 	required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
> 	wspace = sk_stream_wspace(svsk->sk_sk);
> 
> 	if (wspace < sk_stream_min_wspace(svsk->sk_sk))
> 		return 0;
> 	if (required * 2 > wspace)
> 		return 0;
> 
> 	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> 	return 1;
> 

I think this is good stuff and will add it unless someone has an
objection.

> The first test mimics sk_stream_write_space() and xs_tcp_write_space 
> ().  I'm still unsure what to do about the possibility of one of  
> these signed integers going negative on us.
> 
> Bruce?  Neil?  What sayest thou?
> 
> >>> +	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> >>> +	return 1;
> >>> +}
> >>> +
> >>>  static struct svc_xprt_ops svc_tcp_ops = {
> >>>  	.xpo_recvfrom = svc_tcp_recvfrom,
> >>>  	.xpo_sendto = svc_tcp_sendto,
> >>> @@ -1373,6 +1412,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
> >>>  	.xpo_detach = svc_sock_detach,
> >>>  	.xpo_free = svc_sock_free,
> >>>  	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> >>> +	.xpo_has_wspace = svc_tcp_has_wspace,
> >>>  };
> >>>
> >>>  static struct svc_xprt_class svc_tcp_class = {
> 
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com
> -
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 62+ messages in thread

end of thread, other threads:[~2007-12-10 20:39 UTC | newest]

Thread overview: 62+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-29 22:39 [RFC,PATCH 00/38] SVC Transport Switch Tom Tucker
     [not found] ` <20071129223917.14563.77633.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-11-29 22:39   ` [RFC,PATCH 01/38] svc: Add an svc transport class Tom Tucker
2007-11-29 22:39   ` [RFC,PATCH 02/38] svc: Make svc_sock the tcp/udp transport Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 03/38] svc: Change the svc_sock in the rqstp structure to a transport Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 04/38] svc: Add a max payload value to the transport Tom Tucker
     [not found]     ` <20071129224002.14563.96227.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-11-30 20:22       ` Chuck Lever
2007-11-30 20:51         ` Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 05/38] svc: Move sk_sendto and sk_recvfrom to svc_xprt_class Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 06/38] svc: Add transport specific xpo_release function Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 07/38] svc: Add per-transport delete functions Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 08/38] svc: Add xpo_prep_reply_hdr Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 09/38] svc: Add a transport function that checks for write space Tom Tucker
     [not found]     ` <20071129224012.14563.23130.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-11-30 20:46       ` Chuck Lever
2007-11-30 21:39         ` Tom Tucker
     [not found]           ` <1196458764.5432.52.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
2007-11-30 22:43             ` Chuck Lever
2007-12-10 20:43               ` Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 10/38] svc: Move close processing to a single place Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 11/38] svc: Add xpo_accept transport function Tom Tucker
     [not found]     ` <20071129224016.14563.67547.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-11-30 21:01       ` Chuck Lever
2007-11-30 21:47         ` Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 12/38] svc: Add a generic transport svc_create_xprt function Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 13/38] svc: Change services to use new svc_create_xprt service Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 14/38] svc: Change sk_inuse to a kref Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 15/38] svc: Move sk_flags to the svc_xprt structure Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 16/38] svc: Move sk_server and sk_pool to svc_xprt Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 17/38] svc: Make close transport independent Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 18/38] svc: Move sk_reserved to svc_xprt Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 19/38] svc: Make the enqueue service transport neutral and export it Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 20/38] svc: Make svc_send transport neutral Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 21/38] svc: Change svc_sock_received to svc_xprt_received and export it Tom Tucker
     [not found]     ` <20071129224037.14563.69171.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-11-30 21:33       ` Chuck Lever
2007-11-30 23:17         ` Tom Tucker
     [not found]           ` <1196464634.5432.68.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
2007-11-30 23:23             ` Chuck Lever
2007-11-29 22:40   ` [RFC,PATCH 22/38] svc: Remove sk_lastrecv Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 24/38] svc: Make deferral processing xprt independent Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 25/38] svc: Move the sockaddr information to svc_xprt Tom Tucker
     [not found]     ` <20071129224046.14563.59353.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-11-30 23:20       ` Chuck Lever
2007-11-29 22:40   ` [RFC,PATCH 26/38] svc: Make svc_sock_release svc_xprt_release Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 27/38] svc: Make svc_recv transport neutral Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 28/38] svc: Make svc_age_temp_sockets svc_age_temp_transports Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 29/38] svc: Move common create logic to common code Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 30/38] svc: Removing remaining references to rq_sock in rqstp Tom Tucker
2007-11-29 22:40   ` [RFC,PATCH 31/38] svc: Make svc_check_conn_limits xprt independent Tom Tucker
2007-11-29 22:41   ` [RFC,PATCH 32/38] svc: Move the xprt independent code to the svc_xprt.c file Tom Tucker
2007-11-29 22:41   ` [RFC,PATCH 33/38] svc: Add transport hdr size for defer/revisit Tom Tucker
     [not found]     ` <20071129224103.14563.72780.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-11-30 23:51       ` Chuck Lever
2007-11-29 22:41   ` [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files Tom Tucker
     [not found]     ` <20071129224105.14563.48684.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-12-03 16:44       ` Chuck Lever
2007-12-03 16:58         ` J. Bruce Fields
2007-12-03 18:30           ` Chuck Lever
2007-12-03 19:10             ` Tom Tucker
     [not found]               ` <1196709058.5811.21.camel-SMNkleLxa3ZimH42XvhXlA@public.gmane.org>
2007-12-03 20:36                 ` Chuck Lever
2007-12-04  0:45                   ` Tom Tucker
2007-12-05  8:44           ` Greg Banks
2007-11-29 22:41   ` [RFC,PATCH 35/38] knfsd: Support adding transports by writing portlist file Tom Tucker
2007-11-29 22:41   ` [RFC,PATCH 36/38] svc: Add svc API that queries for a transport instance Tom Tucker
     [not found]     ` <20071129224109.14563.34563.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-12-01  0:00       ` Chuck Lever
2007-11-29 22:41   ` [RFC,PATCH 37/38] knfsd: Modify write_ports to use svc_find_xprt service Tom Tucker
2007-11-29 22:41   ` [RFC,PATCH 38/38] svc: Add svc_xprt_names service to replace svc_sock_names Tom Tucker
2007-11-29 23:18   ` [RFC,PATCH 00/38] SVC Transport Switch Tom Tucker
  -- strict thread matches above, loose matches on Subject: below --
2007-11-29 22:51 Tom Tucker
     [not found] ` <20071129225142.15107.46200.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-11-29 22:54   ` [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files Tom Tucker
2007-11-29 22:55 [RFC,PATCH 00/38] RPC Transport Switch Tom Tucker
     [not found] ` <20071129225510.15275.82660.stgit-gUwIgmpLGaKNDNWfRnPdfg@public.gmane.org>
2007-11-29 22:56   ` [RFC,PATCH 34/38] svc: Add /proc/sys/sunrpc/transport files Tom Tucker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox