From: Chuck Lever <chuck.lever@oracle.com>
To: linux-rdma@vger.kernel.org, linux-nfs@vger.kernel.org
Subject: [PATCH v2 12/24] xprtrdma: Honor ->send_request API contract
Date: Tue, 14 Jun 2016 23:16:54 -0400 [thread overview]
Message-ID: <20160615031654.14794.1227.stgit@manet.1015granger.net> (raw)
In-Reply-To: <20160615030626.14794.43805.stgit@manet.1015granger.net>
Commit c93c62231cf5 ("xprtrdma: Disconnect on registration failure")
added a disconnect for some RPC marshaling failures. This is needed
only in a handful of cases, but it was triggering for simple stuff
like temporary resource shortages. Try to straighten this out.
Fix up the lower layers so they don't return -ENOMEM or other error
codes that the RPC client's FSM doesn't explicitly recognize.
Also fix up the places in the send_request path that do want a
disconnect. For example, when ib_post_send or ib_post_recv fail,
this is a sign that there is a send or receive queue resource
miscalculation. That should be rare, and is a sign of a software
bug. But xprtrdma can recover: disconnect to reset the transport and
start over.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
net/sunrpc/xprtrdma/fmr_ops.c | 6 +++---
net/sunrpc/xprtrdma/frwr_ops.c | 13 +++++++------
net/sunrpc/xprtrdma/rpc_rdma.c | 2 +-
net/sunrpc/xprtrdma/transport.c | 20 +++++++++++++++-----
net/sunrpc/xprtrdma/verbs.c | 22 +++++++++++++---------
5 files changed, 39 insertions(+), 24 deletions(-)
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index eb42d7f..1ee2b10 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -209,7 +209,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
rpcrdma_defer_mr_recovery(mw);
mw = rpcrdma_get_mw(r_xprt);
if (!mw)
- return -ENOMEM;
+ return -ENOBUFS;
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
@@ -259,14 +259,14 @@ out_dmamap_err:
pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
mw->mw_sg, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw);
- return -ENOMEM;
+ return -EIO;
out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0],
pageoff, mw->mw_nents, rc);
rpcrdma_defer_mr_recovery(mw);
- return rc;
+ return -EIO;
}
/* Invalidate all memory regions that were registered for "req".
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c9ead2b..e77e40a 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -365,7 +365,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
rpcrdma_defer_mr_recovery(mw);
mw = rpcrdma_get_mw(r_xprt);
if (!mw)
- return -ENOMEM;
+ return -ENOBUFS;
} while (mw->frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->frmr;
frmr->fr_state = FRMR_IS_VALID;
@@ -439,18 +439,18 @@ out_dmamap_err:
pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
mw->mw_sg, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw);
- return -ENOMEM;
+ return -EIO;
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%u/%u)\n",
frmr->fr_mr, n, mw->mw_nents);
- rc = n < 0 ? n : -EIO;
rpcrdma_defer_mr_recovery(mw);
- return rc;
+ return -EIO;
out_senderr:
+ pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
rpcrdma_defer_mr_recovery(mw);
- return rc;
+ return -ENOTCONN;
}
static struct ib_send_wr *
@@ -552,7 +552,8 @@ unmap:
return;
reset_mrs:
- pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
+ pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
+ rdma_disconnect(ia->ri_id);
/* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted. This is synchronous, and slow.
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 35a8109..77e002f 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -251,7 +251,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
/* alloc the pagelist for receiving buffer */
ppages[p] = alloc_page(GFP_ATOMIC);
if (!ppages[p])
- return -ENOMEM;
+ return -EAGAIN;
}
seg[n].mr_page = ppages[p];
seg[n].mr_offset = (void *)(unsigned long) page_base;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 4c8e7f1..be4dd2c 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -558,7 +558,6 @@ out_sendbuf:
out_fail:
rpcrdma_buffer_put(req);
- r_xprt->rx_stats.failed_marshal_count++;
return NULL;
}
@@ -590,8 +589,19 @@ xprt_rdma_free(void *buffer)
rpcrdma_buffer_put(req);
}
-/*
+/**
+ * xprt_rdma_send_request - marshal and send an RPC request
+ * @task: RPC task with an RPC message in rq_snd_buf
+ *
+ * Return values:
+ * 0: The request has been sent
+ * ENOTCONN: Caller needs to invoke connect logic then call again
+ * ENOBUFS: Call again later to send the request
+ * EIO: A permanent error occurred. The request was not sent,
+ * and don't try it again
+ *
* send_request invokes the meat of RPC RDMA. It must do the following:
+ *
* 1. Marshal the RPC request into an RPC RDMA request, which means
* putting a header in front of data, and creating IOVs for RDMA
* from those in the request.
@@ -600,7 +610,6 @@ xprt_rdma_free(void *buffer)
* the request (rpcrdma_ep_post).
* 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
*/
-
static int
xprt_rdma_send_request(struct rpc_task *task)
{
@@ -630,11 +639,12 @@ xprt_rdma_send_request(struct rpc_task *task)
return 0;
failed_marshal:
- r_xprt->rx_stats.failed_marshal_count++;
dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
__func__, rc);
if (rc == -EIO)
- return -EIO;
+ r_xprt->rx_stats.failed_marshal_count++;
+ if (rc != -ENOTCONN)
+ return rc;
drop_connection:
xprt_disconnect_done(xprt);
return -ENOTCONN; /* implies disconnect */
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8b8abd6..35f2176 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1166,7 +1166,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
if (rep) {
rc = rpcrdma_ep_post_recv(ia, ep, rep);
if (rc)
- goto out;
+ return rc;
req->rl_reply = NULL;
}
@@ -1191,10 +1191,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
if (rc)
- dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
- rc);
-out:
- return rc;
+ goto out_postsend_err;
+ return 0;
+
+out_postsend_err:
+ pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
+ return -ENOTCONN;
}
/*
@@ -1219,11 +1221,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
DMA_BIDIRECTIONAL);
rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
-
if (rc)
- dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
- rc);
- return rc;
+ goto out_postrecv;
+ return 0;
+
+out_postrecv:
+ pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
+ return -ENOTCONN;
}
/**
next prev parent reply other threads:[~2016-06-15 3:16 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-06-15 3:15 [PATCH v2 00/24] NFS/RDMA client patches proposed for v4.8 Chuck Lever
2016-06-15 3:15 ` [PATCH v2 01/24] mlx4-ib: Use coherent memory for priv pages Chuck Lever
2016-06-15 4:28 ` Leon Romanovsky
2016-06-15 16:40 ` Chuck Lever
2016-06-16 14:35 ` Leon Romanovsky
2016-06-16 21:10 ` Sagi Grimberg
2016-06-16 21:58 ` Chuck Lever
2016-06-17 9:20 ` Leon Romanovsky
2016-06-17 19:55 ` Chuck Lever
2016-06-18 10:56 ` Leon Romanovsky
2016-06-18 20:08 ` Chuck Lever
2016-06-19 10:04 ` Sagi Grimberg
2016-06-19 19:38 ` Or Gerlitz
2016-06-19 19:43 ` Or Gerlitz
2016-06-19 20:02 ` Chuck Lever
2016-06-20 5:44 ` Leon Romanovsky
2016-06-20 6:34 ` Sagi Grimberg
2016-06-20 7:01 ` Leon Romanovsky
2016-06-20 8:35 ` Sagi Grimberg
2016-06-20 13:41 ` Yishai Hadas
2016-06-21 13:56 ` Sagi Grimberg
2016-06-21 14:35 ` Laurence Oberman
2016-06-19 9:58 ` Sagi Grimberg
2016-06-19 9:48 ` Sagi Grimberg
2016-06-17 9:05 ` Leon Romanovsky
2016-06-19 7:05 ` Sagi Grimberg
2016-06-15 3:15 ` [PATCH v2 02/24] xprtrdma: Remove FMRs from the unmap list after unmapping Chuck Lever
2016-06-15 3:15 ` [PATCH v2 03/24] xprtrdma: Create common scatterlist fields in rpcrdma_mw Chuck Lever
2016-06-15 3:15 ` [PATCH v2 04/24] xprtrdma: Move init and release helpers Chuck Lever
2016-06-15 3:15 ` [PATCH v2 05/24] xprtrdma: Rename fields in rpcrdma_fmr Chuck Lever
2016-06-15 3:16 ` [PATCH v2 06/24] xprtrdma: Use scatterlist for DMA mapping and unmapping under FMR Chuck Lever
2016-06-15 3:16 ` [PATCH v2 07/24] xprtrdma: Refactor MR recovery work queues Chuck Lever
2016-06-15 3:16 ` [PATCH v2 08/24] xprtrdma: Do not leak an MW during a DMA map failure Chuck Lever
2016-06-15 3:16 ` [PATCH v2 09/24] xprtrdma: Remove ALLPHYSICAL memory registration mode Chuck Lever
2016-06-15 3:16 ` [PATCH v2 10/24] xprtrdma: Remove rpcrdma_map_one() and friends Chuck Lever
2016-06-15 3:16 ` [PATCH v2 11/24] xprtrdma: Reply buffer exhaustion can be catastrophic Chuck Lever
2016-06-15 3:16 ` Chuck Lever [this message]
2016-06-15 3:17 ` [PATCH v2 13/24] xprtrdma: Chunk list encoders must not return zero Chuck Lever
2016-06-15 3:17 ` [PATCH v2 14/24] xprtrdma: Allocate MRs on demand Chuck Lever
2016-06-15 3:17 ` [PATCH v2 15/24] xprtrdma: Release orphaned MRs immediately Chuck Lever
2016-06-15 3:17 ` [PATCH v2 16/24] xprtrdma: Place registered MWs on a per-req list Chuck Lever
2016-06-15 3:17 ` [PATCH v2 17/24] xprtrdma: Chunk list encoders no longer share one rl_segments array Chuck Lever
2016-06-15 3:17 ` [PATCH v2 18/24] xprtrdma: rpcrdma_inline_fixup() overruns the receive page list Chuck Lever
2016-06-15 3:17 ` [PATCH v2 19/24] xprtrdma: Do not update {head, tail}.iov_len in rpcrdma_inline_fixup() Chuck Lever
2016-06-15 3:18 ` [PATCH v2 20/24] xprtrdma: Update only specific fields in private receive buffer Chuck Lever
2016-06-15 3:18 ` [PATCH v2 21/24] xprtrdma: Clean up fixup_copy_count accounting Chuck Lever
2016-06-15 3:18 ` [PATCH v2 22/24] xprtrdma: No direct data placement with krb5i and krb5p Chuck Lever
2016-06-15 3:18 ` [PATCH v2 23/24] svc: Avoid garbage replies when pc_func() returns rpc_drop_reply Chuck Lever
2016-06-15 3:18 ` [PATCH v2 24/24] NFS: Don't drop CB requests with invalid principals Chuck Lever
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160615031654.14794.1227.stgit@manet.1015granger.net \
--to=chuck.lever@oracle.com \
--cc=linux-nfs@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).