linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: [PATCH v1 2/5] xprtrdma: Per-connection pad optimization
Date: Fri, 13 Jan 2017 12:43:06 -0500	[thread overview]
Message-ID: <20170113174305.32692.50131.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20170113173023.32692.30661.stgit@manet.1015granger.net>

Pad optimization is changed by echoing into
/proc/sys/sunrpc/rdma_pad_optimize. This is a global setting,
affecting all RPC-over-RDMA connections to all servers.

The marshaling code picks up that value and uses it for decisions
about how to construct each RPC-over-RDMA frame. Having it change
suddenly in mid-operation can result in unexpected failures. And
some servers a client mounts may need pads, while others don't.

So instead, copy the setting into each connection's rpcrdma_ia at
mount time, and use the copy, which can't change during the life of
the connection.

This also removes a hack: rpcrdma_convert_iovs was using
the remote-invalidation-expected flag to predict when it could leave
out Write chunk padding. This is because the Linux server handles
implicit XDR padding on Write chunks correctly, and only Linux
servers can set the connection's remote-invalidation-expected flag.

It's more sensible to use the pad optimization setting instead.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  |   28 ++++++++++++++--------------
 net/sunrpc/xprtrdma/verbs.c     |    1 +
 net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a524d3c..4909758 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -186,9 +186,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
  */
 
 static int
-rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
-	enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
-	bool reminv_expected)
+rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
+		     unsigned int pos, enum rpcrdma_chunktype type,
+		     struct rpcrdma_mr_seg *seg)
 {
 	int len, n, p, page_base;
 	struct page **ppages;
@@ -229,14 +229,15 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	/* When encoding a Read chunk, the tail iovec contains an
 	 * XDR pad and may be omitted.
 	 */
-	if (type == rpcrdma_readch && xprt_rdma_pad_optimize)
+	if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_padding)
 		return n;
 
-	/* When encoding the Write list, some servers need to see an extra
-	 * segment for odd-length Write chunks. The upper layer provides
-	 * space in the tail iovec for this purpose.
+	/* When encoding a Write chunk, some servers need to see an
+	 * extra segment for non-XDR-aligned Write chunks. The upper
+	 * layer provides space in the tail iovec that may be used
+	 * for this purpose.
 	 */
-	if (type == rpcrdma_writech && reminv_expected)
+	if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_padding)
 		return n;
 
 	if (xdrbuf->tail[0].iov_len) {
@@ -291,7 +292,8 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	if (rtype == rpcrdma_areadch)
 		pos = 0;
 	seg = req->rl_segments;
-	nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
+	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
+				     rtype, seg);
 	if (nsegs < 0)
 		return ERR_PTR(nsegs);
 
@@ -353,10 +355,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	}
 
 	seg = req->rl_segments;
-	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
+	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
 				     rqst->rq_rcv_buf.head[0].iov_len,
-				     wtype, seg,
-				     r_xprt->rx_ia.ri_reminv_expected);
+				     wtype, seg);
 	if (nsegs < 0)
 		return ERR_PTR(nsegs);
 
@@ -421,8 +422,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	}
 
 	seg = req->rl_segments;
-	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
-				     r_xprt->rx_ia.ri_reminv_expected);
+	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
 	if (nsegs < 0)
 		return ERR_PTR(nsegs);
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 11d0774..890cb3a 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -208,6 +208,7 @@
 
 	/* Default settings for RPC-over-RDMA Version One */
 	r_xprt->rx_ia.ri_reminv_expected = false;
+	r_xprt->rx_ia.ri_implicit_padding = xprt_rdma_pad_optimize;
 	rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
 	wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
 
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e35efd4..f495df0c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -75,6 +75,7 @@ struct rpcrdma_ia {
 	unsigned int		ri_max_inline_write;
 	unsigned int		ri_max_inline_read;
 	bool			ri_reminv_expected;
+	bool			ri_implicit_padding;
 	enum ib_mr_type		ri_mrtype;
 	struct ib_qp_attr	ri_qp_attr;
 	struct ib_qp_init_attr	ri_qp_init_attr;


  parent reply	other threads:[~2017-01-13 17:43 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-13 17:42 [PATCH v1 0/5] Fix "support large inline thresholds" Chuck Lever
2017-01-13 17:42 ` [PATCH v1 1/5] xprtrdma: Fix Read chunk padding Chuck Lever
2017-01-13 17:43 ` Chuck Lever [this message]
2017-01-13 17:43 ` [PATCH v1 3/5] xprtrdma: Disable pad optimization by default Chuck Lever
2017-01-13 17:43 ` [PATCH v1 4/5] xprtrdma: Reduce required number of send SGEs Chuck Lever
2017-01-13 18:01   ` Parav Pandit
2017-01-13 18:30     ` Chuck Lever
2017-01-13 19:14       ` Parav Pandit
2017-01-13 17:43 ` [PATCH v1 5/5] xprtrdma: Shrink send SGEs array Chuck Lever
2017-01-20 17:30 ` [PATCH v1 0/5] Fix "support large inline thresholds" Steve Wise
2017-01-20 18:17   ` Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170113174305.32692.50131.stgit@manet.1015granger.net \
    --to=chuck.lever@oracle.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).