From: Ben Myers <bpm@sgi.com>
To: "J. Bruce Fields" <bfields@fieldses.org>
Cc: linux-nfs@vger.kernel.org
Subject: sunrpc: socket buffer size tuneable
Date: Thu, 24 Jan 2013 18:59:30 -0600 [thread overview]
Message-ID: <20130125005930.GC30652@sgi.com> (raw)
At 1020 threads the send buffer size wraps and becomes negative causing
the nfs server to grind to a halt. Rather than setting bufsize based
upon the number of nfsd threads, make the buffer sizes tuneable via
module parameters.
Set the buffer sizes in terms of the number of rpcs you want to fit into
the buffer.
Signed-off-by: Ben Myers <bpm@sgi.com>
---
v2: moved tuneables to net/sunrpc/svcsock.c, allow user to lock buffer
size
net/sunrpc/sunrpc_syms.c | 39 +++++++++++++++++++++++++++++++++++
net/sunrpc/svcsock.c | 52 ++++++++++++++++++++++++++---------------------
2 files changed, 68 insertions(+), 23 deletions(-)
Index: xfs/net/sunrpc/svcsock.c
===================================================================
--- xfs.orig/net/sunrpc/svcsock.c
+++ xfs/net/sunrpc/svcsock.c
@@ -57,6 +57,47 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+/*
+ * Size each socket's send and receive buffers based upon the number of rpcs
+ * we'd like to be able to fit into them. The TCP rcvbuf is enough for just a
+ * few requests. TCP sndbuf is the same as the tcp_slot_table_entries default,
+ * the maximum number of in-flight rpcs for a client. UDP sndbuf and rcvbuf
+ * defaults are the same as the udp_slot_table_entries default.
+ *
+ * Setting these too small can cause nfsd threads to block when sending.
+ */
+static int tcp_rcvbuf_nrpc = 8;
+static int tcp_sndbuf_nrpc = 8;
+
+static int udp_rcvbuf_nrpc = 6;
+static int udp_sndbuf_nrpc = 6;
+
+static bool lock_bufsize = false;
+
+/*
+ * Minimum for bufsize settings is enough space for 6 rpcs, maximum is enough
+ * space for 128 rpcs.
+ */
+static int param_set_bufsize_nrpc(const char *val, struct kernel_param *kp)
+{
+ char *endp;
+ int num = simple_strtol(val, &endp, 0);
+
+ if (endp == val || (*endp && *endp != '\n') || num < 6 || num > 128)
+ return -EINVAL;
+ *((int *)kp->arg) = num;
+ return 0;
+}
+
+module_param_call(tcp_rcvbuf_nrpc, param_set_bufsize_nrpc, param_get_int,
+ &tcp_rcvbuf_nrpc, 0644);
+module_param_call(tcp_sndbuf_nrpc, param_set_bufsize_nrpc, param_get_int,
+ &tcp_sndbuf_nrpc, 0644);
+module_param_call(udp_rcvbuf_nrpc, param_set_bufsize_nrpc, param_get_int,
+ &udp_rcvbuf_nrpc, 0644);
+module_param_call(udp_sndbuf_nrpc, param_set_bufsize_nrpc, param_get_int,
+ &udp_sndbuf_nrpc, 0644);
+module_param(lock_bufsize, bool, 0644);
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int flags);
@@ -375,8 +416,8 @@ static int svc_partial_recvfrom(struct s
/*
* Set socket snd and rcv buffer lengths
*/
-static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
- unsigned int rcv)
+static void svc_sock_setbufsize(struct socket *sock, int snd, int rcv,
+ bool lock)
{
#if 0
mm_segment_t oldfs;
@@ -392,8 +433,17 @@ static void svc_sock_setbufsize(struct s
* DaveM said I could!
*/
lock_sock(sock->sk);
- sock->sk->sk_sndbuf = snd * 2;
- sock->sk->sk_rcvbuf = rcv * 2;
+ sock->sk->sk_sndbuf = snd;
+ sock->sk->sk_rcvbuf = rcv;
+
+ if (lock) {
+ /*
+ * The socket buffer can be resized by the networking code
+ * unless you specify that this is not to be done.
+ */
+ sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
+ }
+
sock->sk->sk_write_space(sock->sk);
release_sock(sock->sk);
#endif
@@ -525,18 +575,17 @@ static int svc_udp_recvfrom(struct svc_r
size_t len;
int err;
- if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
- /* udp sockets need large rcvbuf as all pending
- * requests are still in that buffer. sndbuf must
- * also be large enough that there is enough space
- * for one reply per thread. We count all threads
- * rather than threads in a particular pool, which
- * provides an upper bound on the number of threads
- * which will access the socket.
- */
- svc_sock_setbufsize(svsk->sk_sock,
- (serv->sv_nrthreads+3) * serv->sv_max_mesg,
- (serv->sv_nrthreads+3) * serv->sv_max_mesg);
+ if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) {
+ /*
+ * UDP sockets need a large rcvbuf as all pending requests are
+ * still in that buffer. sndbuf must also be large enough to
+ * fit a reply for each request the server could be processing.
+ */
+ svc_sock_setbufsize(svsk->sk_sock,
+ udp_sndbuf_nrpc * serv->sv_max_mesg,
+ udp_rcvbuf_nrpc * serv->sv_max_mesg,
+ lock_bufsize);
+ }
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
skb = NULL;
@@ -695,13 +744,14 @@ static void svc_udp_init(struct svc_sock
svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space;
- /* initialise setting must have enough space to
- * receive and respond to one request.
- * svc_udp_recvfrom will re-adjust if necessary
+ /*
+ * Initial setting must have enough space to receive and respond to one
+ * request. svc_udp_recvfrom will readjust if necessary.
*/
svc_sock_setbufsize(svsk->sk_sock,
- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
+ udp_sndbuf_nrpc * svsk->sk_xprt.xpt_server->sv_max_mesg,
+ udp_rcvbuf_nrpc * svsk->sk_xprt.xpt_server->sv_max_mesg,
+ lock_bufsize);
/* data might have come in before data_ready set up */
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1377,8 +1427,10 @@ static struct svc_sock *svc_setup_socket
/* initialise setting must have enough space to
* receive and respond to one request.
*/
- svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
- 4 * serv->sv_max_mesg);
+ svc_sock_setbufsize(svsk->sk_sock,
+ tcp_sndbuf_nrpc * serv->sv_max_mesg,
+ tcp_rcvbuf_nrpc * serv->sv_max_mesg,
+ lock_bufsize);
svc_tcp_init(svsk, serv);
}
next reply other threads:[~2013-01-25 0:59 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-25 0:59 Ben Myers [this message]
2013-01-25 18:45 ` sunrpc: socket buffer size tuneable J. Bruce Fields
2013-01-25 19:10 ` Ben Myers
2013-01-25 18:57 ` J. Bruce Fields
2013-01-25 19:16 ` Jim Rees
2013-01-25 19:29 ` Ben Myers
2013-01-25 20:21 ` J. Bruce Fields
2013-01-25 20:35 ` Ben Myers
2013-01-25 21:12 ` Myklebust, Trond
2013-01-25 21:21 ` J. Bruce Fields
2013-01-25 21:29 ` Myklebust, Trond
2013-01-25 21:35 ` J. Bruce Fields
2013-01-25 21:45 ` Myklebust, Trond
2013-01-25 21:57 ` J. Bruce Fields
2013-01-25 22:02 ` Ben Myers
2013-01-25 22:20 ` Myklebust, Trond
2013-01-25 22:34 ` J. Bruce Fields
2013-01-25 23:00 ` Myklebust, Trond
2013-01-25 20:35 ` J. Bruce Fields
2013-01-25 20:51 ` J. Bruce Fields
2013-01-25 21:13 ` Ben Myers
2013-01-25 21:02 ` J. Bruce Fields
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130125005930.GC30652@sgi.com \
--to=bpm@sgi.com \
--cc=bfields@fieldses.org \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).