From: Steve Wise <swise@opengridcomputing.com>
To: trond.myklebust@primarydata.com
Cc: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org
Subject: [PATCH] xprtrdma: mind the device's max fast register page list depth
Date: Thu, 10 Apr 2014 10:13:25 -0500 [thread overview]
Message-ID: <20140410151325.28724.92004.stgit@build.ogc.int> (raw)
Some rdma devices don't support a fast register page list depth of
at least RPCRDMA_MAX_DATA_SEGS. So xprtrdma needs to chunk its fast
register regions according to the minimum of the device max supported
depth or RPCRDMA_MAX_DATA_SEGS.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
net/sunrpc/xprtrdma/rpc_rdma.c | 2 -
net/sunrpc/xprtrdma/verbs.c | 64 ++++++++++++++++++++++++++++-----------
net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
3 files changed, 47 insertions(+), 20 deletions(-)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 96ead52..14d7634 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -249,8 +249,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
req->rl_nchunks = nchunks;
BUG_ON(nchunks == 0);
- BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
- && (nchunks > 3));
/*
* finish off header. If write, marshal discrim and nchunks.
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560..c08f193 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
__func__);
memreg = RPCRDMA_REGISTER;
#endif
+ } else {
+ /* Mind the ia limit on FRMR page list depth */
+ ia->ri_max_frmr_depth = min_t(unsigned int,
+ RPCRDMA_MAX_DATA_SEGS,
+ devattr.max_fast_reg_page_list_len);
}
break;
}
@@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRMR: {
+ int depth = 7;
+
/* Add room for frmr register and invalidate WRs.
* 1. FRMR reg WR for head
* 2. FRMR invalidate WR for head
- * 3. FRMR reg WR for pagelist
- * 4. FRMR invalidate WR for pagelist
+ * 3. N FRMR reg WRs for pagelist
+ * 4. N FRMR invalidate WRs for pagelist
* 5. FRMR reg WR for tail
* 6. FRMR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
- ep->rep_attr.cap.max_send_wr *= 7;
+
+ /* Calculate N if the device max FRMR depth is smaller than
+ * RPCRDMA_MAX_DATA_SEGS.
+ */
+ if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ int delta = RPCRDMA_MAX_DATA_SEGS -
+ ia->ri_max_frmr_depth;
+
+ do {
+ depth += 2; /* FRMR reg + invalidate */
+ delta -= ia->ri_max_frmr_depth;
+ } while (delta > 0);
+
+ }
+ ep->rep_attr.cap.max_send_wr *= depth;
if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
- cdata->max_requests = devattr.max_qp_wr / 7;
+ cdata->max_requests = devattr.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+ depth;
}
break;
+ }
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
/* Add room for mw_binds+unbinds - overkill! */
@@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
case RPCRDMA_FRMR:
for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
- RPCRDMA_MAX_SEGS);
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_mr)) {
rc = PTR_ERR(r->r.frmr.fr_mr);
dprintk("RPC: %s: ib_alloc_fast_reg_mr"
" failed %i\n", __func__, rc);
goto out;
}
- r->r.frmr.fr_pgl =
- ib_alloc_fast_reg_page_list(ia->ri_id->device,
- RPCRDMA_MAX_SEGS);
+ r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+ ia->ri_id->device,
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_pgl)) {
rc = PTR_ERR(r->r.frmr.fr_pgl);
dprintk("RPC: %s: "
@@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
- if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
- *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ if (*nsegs > ia->ri_max_frmr_depth)
+ *nsegs = ia->ri_max_frmr_depth;
for (page_no = i = 0; i < *nsegs;) {
rpcrdma_map_one(ia, seg, writing);
pa = seg->mr_dma;
@@ -1796,6 +1819,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc = 0;
+ int tmp_nsegs = nsegs;
switch (ia->ri_memreg_strategy) {
@@ -1805,35 +1829,39 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
seg->mr_rkey = ia->ri_bind_mem->rkey;
seg->mr_base = seg->mr_dma;
seg->mr_nsegs = 1;
- nsegs = 1;
+ tmp_nsegs = 1;
break;
#endif
/* Registration using frmr registration */
case RPCRDMA_FRMR:
- rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
+ rc = rpcrdma_register_frmr_external(seg, &tmp_nsegs, writing,
+ ia, r_xprt);
break;
/* Registration using fmr memory registration */
case RPCRDMA_MTHCAFMR:
- rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
+ rc = rpcrdma_register_fmr_external(seg, &tmp_nsegs, writing,
+ ia);
break;
/* Registration using memory windows */
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
- rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
+ rc = rpcrdma_register_memwin_external(seg, &tmp_nsegs, writing,
+ ia, r_xprt);
break;
/* Default registration each time */
default:
- rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
+ rc = rpcrdma_register_default_external(seg, &tmp_nsegs, writing,
+ ia);
break;
}
if (rc)
return -1;
- return nsegs;
+ return tmp_nsegs;
}
int
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445d..98340a3 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -66,6 +66,7 @@ struct rpcrdma_ia {
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
+ unsigned int ri_max_frmr_depth;
};
/*
WARNING: multiple messages have this Message-ID (diff)
From: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
To: trond.myklebust-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org
Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH] xprtrdma: mind the device's max fast register page list depth
Date: Thu, 10 Apr 2014 10:13:25 -0500 [thread overview]
Message-ID: <20140410151325.28724.92004.stgit@build.ogc.int> (raw)
Some rdma devices don't support a fast register page list depth of
at least RPCRDMA_MAX_DATA_SEGS. So xprtrdma needs to chunk its fast
register regions according to the minimum of the device max supported
depth or RPCRDMA_MAX_DATA_SEGS.
Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
net/sunrpc/xprtrdma/rpc_rdma.c | 2 -
net/sunrpc/xprtrdma/verbs.c | 64 ++++++++++++++++++++++++++++-----------
net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
3 files changed, 47 insertions(+), 20 deletions(-)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 96ead52..14d7634 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -249,8 +249,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
req->rl_nchunks = nchunks;
BUG_ON(nchunks == 0);
- BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
- && (nchunks > 3));
/*
* finish off header. If write, marshal discrim and nchunks.
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560..c08f193 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
__func__);
memreg = RPCRDMA_REGISTER;
#endif
+ } else {
+ /* Mind the ia limit on FRMR page list depth */
+ ia->ri_max_frmr_depth = min_t(unsigned int,
+ RPCRDMA_MAX_DATA_SEGS,
+ devattr.max_fast_reg_page_list_len);
}
break;
}
@@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRMR: {
+ int depth = 7;
+
/* Add room for frmr register and invalidate WRs.
* 1. FRMR reg WR for head
* 2. FRMR invalidate WR for head
- * 3. FRMR reg WR for pagelist
- * 4. FRMR invalidate WR for pagelist
+ * 3. N FRMR reg WRs for pagelist
+ * 4. N FRMR invalidate WRs for pagelist
* 5. FRMR reg WR for tail
* 6. FRMR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
- ep->rep_attr.cap.max_send_wr *= 7;
+
+ /* Calculate N if the device max FRMR depth is smaller than
+ * RPCRDMA_MAX_DATA_SEGS.
+ */
+ if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ int delta = RPCRDMA_MAX_DATA_SEGS -
+ ia->ri_max_frmr_depth;
+
+ do {
+ depth += 2; /* FRMR reg + invalidate */
+ delta -= ia->ri_max_frmr_depth;
+ } while (delta > 0);
+
+ }
+ ep->rep_attr.cap.max_send_wr *= depth;
if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
- cdata->max_requests = devattr.max_qp_wr / 7;
+ cdata->max_requests = devattr.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+ depth;
}
break;
+ }
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
/* Add room for mw_binds+unbinds - overkill! */
@@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
case RPCRDMA_FRMR:
for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
- RPCRDMA_MAX_SEGS);
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_mr)) {
rc = PTR_ERR(r->r.frmr.fr_mr);
dprintk("RPC: %s: ib_alloc_fast_reg_mr"
" failed %i\n", __func__, rc);
goto out;
}
- r->r.frmr.fr_pgl =
- ib_alloc_fast_reg_page_list(ia->ri_id->device,
- RPCRDMA_MAX_SEGS);
+ r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+ ia->ri_id->device,
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_pgl)) {
rc = PTR_ERR(r->r.frmr.fr_pgl);
dprintk("RPC: %s: "
@@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
- if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
- *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ if (*nsegs > ia->ri_max_frmr_depth)
+ *nsegs = ia->ri_max_frmr_depth;
for (page_no = i = 0; i < *nsegs;) {
rpcrdma_map_one(ia, seg, writing);
pa = seg->mr_dma;
@@ -1796,6 +1819,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc = 0;
+ int tmp_nsegs = nsegs;
switch (ia->ri_memreg_strategy) {
@@ -1805,35 +1829,39 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
seg->mr_rkey = ia->ri_bind_mem->rkey;
seg->mr_base = seg->mr_dma;
seg->mr_nsegs = 1;
- nsegs = 1;
+ tmp_nsegs = 1;
break;
#endif
/* Registration using frmr registration */
case RPCRDMA_FRMR:
- rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
+ rc = rpcrdma_register_frmr_external(seg, &tmp_nsegs, writing,
+ ia, r_xprt);
break;
/* Registration using fmr memory registration */
case RPCRDMA_MTHCAFMR:
- rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
+ rc = rpcrdma_register_fmr_external(seg, &tmp_nsegs, writing,
+ ia);
break;
/* Registration using memory windows */
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
- rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
+ rc = rpcrdma_register_memwin_external(seg, &tmp_nsegs, writing,
+ ia, r_xprt);
break;
/* Default registration each time */
default:
- rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
+ rc = rpcrdma_register_default_external(seg, &tmp_nsegs, writing,
+ ia);
break;
}
if (rc)
return -1;
- return nsegs;
+ return tmp_nsegs;
}
int
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445d..98340a3 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -66,6 +66,7 @@ struct rpcrdma_ia {
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
+ unsigned int ri_max_frmr_depth;
};
/*
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next reply other threads:[~2014-04-10 15:13 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-04-10 15:13 Steve Wise [this message]
2014-04-10 15:13 ` [PATCH] xprtrdma: mind the device's max fast register page list depth Steve Wise
2014-04-11 18:34 ` Chuck Lever
2014-04-11 18:34 ` Chuck Lever
2014-04-11 19:11 ` Steve Wise
2014-04-11 19:11 ` Steve Wise
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140410151325.28724.92004.stgit@build.ogc.int \
--to=swise@opengridcomputing.com \
--cc=linux-nfs@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=trond.myklebust@primarydata.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.