* [PATCH v1 0/2] NFS/RDMA fixes for v4.17-rc
@ 2018-05-01 15:37 Chuck Lever
2018-05-01 15:37 ` [PATCH v1 1/2] xprtrdma: Fix list corruption / DMAR errors during MR recovery Chuck Lever
2018-05-01 15:37 ` [PATCH v1 2/2] sunrpc: Fix latency trace point crashes Chuck Lever
0 siblings, 2 replies; 3+ messages in thread
From: Chuck Lever @ 2018-05-01 15:37 UTC (permalink / raw)
To: anna.schumaker; +Cc: linux-rdma, linux-nfs
Hi Anna-
These are fixes that might be appropriate for v4.17-rc. If not,
consider them the first items in my for-v4.18 series.
---
Chuck Lever (2):
xprtrdma: Fix list corruption / DMAR errors during MR recovery
sunrpc: Fix latency trace point crashes
include/trace/events/sunrpc.h | 16 ++++++----------
net/sunrpc/xprtrdma/fmr_ops.c | 5 +----
net/sunrpc/xprtrdma/frwr_ops.c | 9 +++------
net/sunrpc/xprtrdma/verbs.c | 5 +++++
net/sunrpc/xprtrdma/xprt_rdma.h | 2 +-
5 files changed, 16 insertions(+), 21 deletions(-)
--
Chuck Lever
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH v1 1/2] xprtrdma: Fix list corruption / DMAR errors during MR recovery
2018-05-01 15:37 [PATCH v1 0/2] NFS/RDMA fixes for v4.17-rc Chuck Lever
@ 2018-05-01 15:37 ` Chuck Lever
2018-05-01 15:37 ` [PATCH v1 2/2] sunrpc: Fix latency trace point crashes Chuck Lever
1 sibling, 0 replies; 3+ messages in thread
From: Chuck Lever @ 2018-05-01 15:37 UTC (permalink / raw)
To: anna.schumaker; +Cc: linux-rdma, linux-nfs
The ro_release_mr methods check whether mr->mr_list is empty.
Therefore, be sure to always use list_del_init when removing an MR
linked into a list using that field. Otherwise, when recovering from
transport failures or device removal, list corruption can result, or
MRs can get mapped or unmapped an odd number of times, resulting in
IOMMU-related failures.
In general this fix is appropriate back to v4.8. However, code
changes since then make it impossible to apply this patch directly
to stable kernels. The fix would have to be applied by hand or
reworked for kernels earlier than v4.16.
Backport guidance -- there are several cases:
- When creating an MR, initialize mr_list so that using list_empty
on an as-yet-unused MR is safe.
- When an MR is being handled by the remote invalidation path,
ensure that mr_list is reinitialized when it is removed from
rl_registered.
- When an MR is being handled by rpcrdma_destroy_mrs, it is removed
from mr_all, but it may still be on an rl_registered list. In
that case, the MR needs to be removed from that list before being
released.
- Other cases are covered by using list_del_init in rpcrdma_mr_pop.
Fixes: 9d6b04097882 ('xprtrdma: Place registered MWs on a ... ')
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
net/sunrpc/xprtrdma/fmr_ops.c | 5 +----
net/sunrpc/xprtrdma/frwr_ops.c | 9 +++------
net/sunrpc/xprtrdma/verbs.c | 5 +++++
net/sunrpc/xprtrdma/xprt_rdma.h | 2 +-
4 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 5cc68a8..f2f6395 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -72,6 +72,7 @@ enum {
if (IS_ERR(mr->fmr.fm_mr))
goto out_fmr_err;
+ INIT_LIST_HEAD(&mr->mr_list);
return 0;
out_fmr_err:
@@ -102,10 +103,6 @@ enum {
LIST_HEAD(unmap_list);
int rc;
- /* Ensure MW is not on any rl_registered list */
- if (!list_empty(&mr->mr_list))
- list_del(&mr->mr_list);
-
kfree(mr->fmr.fm_physaddrs);
kfree(mr->mr_sg);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c5743a0..c59c5c7 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -110,6 +110,7 @@
if (!mr->mr_sg)
goto out_list_err;
+ INIT_LIST_HEAD(&mr->mr_list);
sg_init_table(mr->mr_sg, depth);
init_completion(&frwr->fr_linv_done);
return 0;
@@ -133,10 +134,6 @@
{
int rc;
- /* Ensure MR is not on any rl_registered list */
- if (!list_empty(&mr->mr_list))
- list_del(&mr->mr_list);
-
rc = ib_dereg_mr(mr->frwr.fr_mr);
if (rc)
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
@@ -195,7 +192,7 @@
return;
out_release:
- pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
+ pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
spin_lock(&r_xprt->rx_buf.rb_mrlock);
@@ -476,7 +473,7 @@
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
- list_del(&mr->mr_list);
+ list_del_init(&mr->mr_list);
trace_xprtrdma_remoteinv(mr);
mr->frwr.fr_state = FRWR_IS_INVALID;
rpcrdma_mr_unmap_and_put(mr);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index fe5eaca..c345d36 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1254,6 +1254,11 @@ struct rpcrdma_req *
list_del(&mr->mr_all);
spin_unlock(&buf->rb_mrlock);
+
+ /* Ensure MW is not on any rl_registered list */
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
+
ia->ri_ops->ro_release_mr(mr);
count++;
spin_lock(&buf->rb_mrlock);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 3d3b423..cb41b12 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -380,7 +380,7 @@ enum {
struct rpcrdma_mr *mr;
mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
- list_del(&mr->mr_list);
+ list_del_init(&mr->mr_list);
return mr;
}
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v1 2/2] sunrpc: Fix latency trace point crashes
2018-05-01 15:37 [PATCH v1 0/2] NFS/RDMA fixes for v4.17-rc Chuck Lever
2018-05-01 15:37 ` [PATCH v1 1/2] xprtrdma: Fix list corruption / DMAR errors during MR recovery Chuck Lever
@ 2018-05-01 15:37 ` Chuck Lever
1 sibling, 0 replies; 3+ messages in thread
From: Chuck Lever @ 2018-05-01 15:37 UTC (permalink / raw)
To: anna.schumaker; +Cc: linux-rdma, linux-nfs
If the rpc_task survived longer than the transport, task->tk_xprt
points to freed memory by the time rpc_count_iostats_metrics runs.
Replace the references to task->tk_xprt with references to the
task's tk_client.
Reported-by: syzbot+27db1f90e2b972a5f2d3@syzkaller.appspotmail.com
Fixes: 40bf7eb304b5 ('sunrpc: Add static trace point to report ...')
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
include/trace/events/sunrpc.h | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 335d872..bbb08a3 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -224,6 +224,8 @@
TP_ARGS(task, backlog, rtt, execute),
TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
__field(u32, xid)
__field(int, version)
__string(progname, task->tk_client->cl_program->name)
@@ -231,13 +233,11 @@
__field(unsigned long, backlog)
__field(unsigned long, rtt)
__field(unsigned long, execute)
- __string(addr,
- task->tk_xprt->address_strings[RPC_DISPLAY_ADDR])
- __string(port,
- task->tk_xprt->address_strings[RPC_DISPLAY_PORT])
),
TP_fast_assign(
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->task_id = task->tk_pid;
__entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
__entry->version = task->tk_client->cl_vers;
__assign_str(progname, task->tk_client->cl_program->name)
@@ -245,14 +245,10 @@
__entry->backlog = ktime_to_us(backlog);
__entry->rtt = ktime_to_us(rtt);
__entry->execute = ktime_to_us(execute);
- __assign_str(addr,
- task->tk_xprt->address_strings[RPC_DISPLAY_ADDR]);
- __assign_str(port,
- task->tk_xprt->address_strings[RPC_DISPLAY_PORT]);
),
- TP_printk("peer=[%s]:%s xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu",
- __get_str(addr), __get_str(port), __entry->xid,
+ TP_printk("task:%u@%d xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu",
+ __entry->task_id, __entry->client_id, __entry->xid,
__get_str(progname), __entry->version, __get_str(procname),
__entry->backlog, __entry->rtt, __entry->execute)
);
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-05-01 15:37 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-05-01 15:37 [PATCH v1 0/2] NFS/RDMA fixes for v4.17-rc Chuck Lever
2018-05-01 15:37 ` [PATCH v1 1/2] xprtrdma: Fix list corruption / DMAR errors during MR recovery Chuck Lever
2018-05-01 15:37 ` [PATCH v1 2/2] sunrpc: Fix latency trace point crashes Chuck Lever
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).