From: cel@kernel.org
To: Trond Myklebust <trond.myklebust@hammerspace.com>,
Anna Schumaker <anna@kernel.org>
Cc: <linux-nfs@vger.kernel.org>, Chuck Lever <chuck.lever@oracle.com>,
Sagi Grimberg <sagi@grimberg.me>
Subject: [PATCH 2/5] rpcrdma: Implement generic device removal
Date: Tue, 4 Jun 2024 15:45:24 -0400 [thread overview]
Message-ID: <20240604194522.10390-7-cel@kernel.org> (raw)
In-Reply-To: <20240604194522.10390-6-cel@kernel.org>
From: Chuck Lever <chuck.lever@oracle.com>
Commit e87a911fed07 ("nvme-rdma: use ib_client API to detect device
removal") explains the benefits of handling device removal outside
of the CM event handler.
Sketch in an IB device removal notification mechanism that can be
used by both the client and server side RPC-over-RDMA transport
implementations.
Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
include/linux/sunrpc/rdma_rn.h | 27 +++++
include/trace/events/rpcrdma.h | 34 ++++++
net/sunrpc/xprtrdma/Makefile | 2 +-
net/sunrpc/xprtrdma/ib_client.c | 181 ++++++++++++++++++++++++++++++++
net/sunrpc/xprtrdma/module.c | 18 +++-
5 files changed, 258 insertions(+), 4 deletions(-)
create mode 100644 include/linux/sunrpc/rdma_rn.h
create mode 100644 net/sunrpc/xprtrdma/ib_client.c
diff --git a/include/linux/sunrpc/rdma_rn.h b/include/linux/sunrpc/rdma_rn.h
new file mode 100644
index 000000000000..7d032ca057af
--- /dev/null
+++ b/include/linux/sunrpc/rdma_rn.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * * Copyright (c) 2024, Oracle and/or its affiliates.
+ */
+
+#ifndef _LINUX_SUNRPC_RDMA_RN_H
+#define _LINUX_SUNRPC_RDMA_RN_H
+
+#include <rdma/ib_verbs.h>
+
+/**
+ * rpcrdma_notification - request removal notification
+ */
+struct rpcrdma_notification {
+ void (*rn_done)(struct rpcrdma_notification *rn);
+ u32 rn_index;
+};
+
+int rpcrdma_rn_register(struct ib_device *device,
+ struct rpcrdma_notification *rn,
+ void (*done)(struct rpcrdma_notification *rn));
+void rpcrdma_rn_unregister(struct ib_device *device,
+ struct rpcrdma_notification *rn);
+int rpcrdma_ib_client_register(void);
+void rpcrdma_ib_client_unregister(void);
+
+#endif /* _LINUX_SUNRPC_RDMA_RN_H */
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 14392652273a..ecdaf088219d 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -2220,6 +2220,40 @@ TRACE_EVENT(svcrdma_sq_post_err,
)
);
+DECLARE_EVENT_CLASS(rpcrdma_client_device_class,
+ TP_PROTO(
+ const struct ib_device *device
+ ),
+
+ TP_ARGS(device),
+
+ TP_STRUCT__entry(
+ __string(name, device->name)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name);
+ ),
+
+ TP_printk("device=%s",
+ __get_str(name)
+ )
+);
+
+#define DEFINE_CLIENT_DEVICE_EVENT(name) \
+ DEFINE_EVENT(rpcrdma_client_device_class, name, \
+ TP_PROTO( \
+ const struct ib_device *device \
+ ), \
+ TP_ARGS(device) \
+ )
+
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_completion);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_add_one);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done);
+
#endif /* _TRACE_RPCRDMA_H */
#include <trace/define_trace.h>
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 55b21bae866d..3232aa23cdb4 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
-rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
+rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
svc_rdma_pcl.o module.o
diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c
new file mode 100644
index 000000000000..a938c19c3490
--- /dev/null
+++ b/net/sunrpc/xprtrdma/ib_client.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (c) 2024 Oracle. All rights reserved.
+ */
+
+/* #include <linux/module.h>
+#include <linux/slab.h> */
+#include <linux/xarray.h>
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/completion.h>
+
+#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rdma_rn.h>
+
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
+/* Per-ib_device private data for rpcrdma */
+struct rpcrdma_device {
+ struct kref rd_kref;
+ unsigned long rd_flags;
+ struct ib_device *rd_device;
+ struct xarray rd_xa;
+ struct completion rd_done;
+};
+
+#define RPCRDMA_RD_F_REMOVING (0)
+
+static struct ib_client rpcrdma_ib_client;
+
+/*
+ * Listeners have no associated device, so we never register them.
+ * Note that ib_get_client_data() does not check if @device is
+ * NULL for us.
+ */
+static struct rpcrdma_device *rpcrdma_get_client_data(struct ib_device *device)
+{
+ if (!device)
+ return NULL;
+ return ib_get_client_data(device, &rpcrdma_ib_client);
+}
+
+/**
+ * rpcrdma_rn_register - register to get device removal notifications
+ * @device: device to monitor
+ * @rn: notification object that wishes to be notified
+ * @done: callback to notify caller of device removal
+ *
+ * Returns zero on success. The callback in rn_done is guaranteed
+ * to be invoked when the device is removed, unless this notification
+ * is unregistered first.
+ *
+ * On failure, a negative errno is returned.
+ */
+int rpcrdma_rn_register(struct ib_device *device,
+ struct rpcrdma_notification *rn,
+ void (*done)(struct rpcrdma_notification *rn))
+{
+ struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
+
+ if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags))
+ return -ENETUNREACH;
+
+ kref_get(&rd->rd_kref);
+ if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0)
+ return -ENOMEM;
+ rn->rn_done = done;
+ return 0;
+}
+
+static void rpcrdma_rn_release(struct kref *kref)
+{
+ struct rpcrdma_device *rd = container_of(kref, struct rpcrdma_device,
+ rd_kref);
+
+ trace_rpcrdma_client_completion(rd->rd_device);
+ complete(&rd->rd_done);
+}
+
+/**
+ * rpcrdma_rn_unregister - stop device removal notifications
+ * @device: monitored device
+ * @rn: notification object that no longer wishes to be notified
+ */
+void rpcrdma_rn_unregister(struct ib_device *device,
+ struct rpcrdma_notification *rn)
+{
+ struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
+
+ if (!rd)
+ return;
+
+ xa_erase(&rd->rd_xa, rn->rn_index);
+ kref_put(&rd->rd_kref, rpcrdma_rn_release);
+}
+
+/**
+ * rpcrdma_add_one - ib_client device insertion callback
+ * @device: device about to be inserted
+ *
+ * Returns zero on success. xprtrdma private data has been allocated
+ * for this device. On failure, a negative errno is returned.
+ */
+static int rpcrdma_add_one(struct ib_device *device)
+{
+ struct rpcrdma_device *rd;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+
+ kref_init(&rd->rd_kref);
+ xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1);
+ rd->rd_device = device;
+ init_completion(&rd->rd_done);
+ ib_set_client_data(device, &rpcrdma_ib_client, rd);
+
+ trace_rpcrdma_client_add_one(device);
+ return 0;
+}
+
+/**
+ * rpcrdma_remove_one - ib_client device removal callback
+ * @device: device about to be removed
+ * @client_data: this module's private per-device data
+ *
+ * Upon return, all transports associated with @device have divested
+ * themselves from IB hardware resources.
+ */
+static void rpcrdma_remove_one(struct ib_device *device,
+ void *client_data)
+{
+ struct rpcrdma_device *rd = client_data;
+ struct rpcrdma_notification *rn;
+ unsigned long index;
+
+ trace_rpcrdma_client_remove_one(device);
+
+ set_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags);
+ xa_for_each(&rd->rd_xa, index, rn)
+ rn->rn_done(rn);
+
+ /*
+ * Wait only if there are still outstanding notification
+ * registrants for this device.
+ */
+ if (!refcount_dec_and_test(&rd->rd_kref.refcount)) {
+ trace_rpcrdma_client_wait_on(device);
+ wait_for_completion(&rd->rd_done);
+ }
+
+ trace_rpcrdma_client_remove_one_done(device);
+ kfree(rd);
+}
+
+static struct ib_client rpcrdma_ib_client = {
+ .name = "rpcrdma",
+ .add = rpcrdma_add_one,
+ .remove = rpcrdma_remove_one,
+};
+
+/**
+ * rpcrdma_ib_client_unregister - unregister ib_client for xprtrdma
+ *
+ * cel: watch for orphaned rpcrdma_device objects on module unload
+ */
+void rpcrdma_ib_client_unregister(void)
+{
+ ib_unregister_client(&rpcrdma_ib_client);
+}
+
+/**
+ * rpcrdma_ib_client_register - register ib_client for rpcrdma
+ *
+ * Returns zero on success, or a negative errno.
+ */
+int rpcrdma_ib_client_register(void)
+{
+ return ib_register_client(&rpcrdma_ib_client);
+}
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 45c5b41ac8dc..697f571d4c01 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rdma_rn.h>
#include <asm/swab.h>
@@ -30,21 +31,32 @@ static void __exit rpc_rdma_cleanup(void)
{
xprt_rdma_cleanup();
svc_rdma_cleanup();
+ rpcrdma_ib_client_unregister();
}
static int __init rpc_rdma_init(void)
{
int rc;
+ rc = rpcrdma_ib_client_register();
+ if (rc)
+ goto out_rc;
+
rc = svc_rdma_init();
if (rc)
- goto out;
+ goto out_ib_client;
rc = xprt_rdma_init();
if (rc)
- svc_rdma_cleanup();
+ goto out_svc_rdma;
-out:
+ return 0;
+
+out_svc_rdma:
+ svc_rdma_cleanup();
+out_ib_client:
+ rpcrdma_ib_client_unregister();
+out_rc:
return rc;
}
--
2.45.1
next prev parent reply other threads:[~2024-06-04 19:45 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-04 19:45 [PATCH 1/5] xprtrdma: Fix rpcrdma_reqs_reset() cel
2024-06-04 19:45 ` cel [this message]
2024-06-05 8:43 ` [PATCH 2/5] rpcrdma: Implement generic device removal Sagi Grimberg
2024-06-04 19:45 ` [PATCH 3/5] xprtrdma: Handle device removal outside of the CM event handler cel
2024-06-05 8:44 ` Sagi Grimberg
2024-06-04 19:45 ` [PATCH 4/5] xprtrdma: Clean up synopsis of frwr_mr_unmap() cel
2024-06-05 8:45 ` Sagi Grimberg
2024-06-04 19:45 ` [PATCH 5/5] xprtrdma: Remove temp allocation of rpcrdma_rep objects cel
2024-06-05 8:52 ` Sagi Grimberg
2024-06-05 8:42 ` [PATCH 1/5] xprtrdma: Fix rpcrdma_reqs_reset() Sagi Grimberg
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240604194522.10390-7-cel@kernel.org \
--to=cel@kernel.org \
--cc=anna@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=linux-nfs@vger.kernel.org \
--cc=sagi@grimberg.me \
--cc=trond.myklebust@hammerspace.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox