From: Daisuke Matsuda <dskmtsd@gmail.com>
To: linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org,
leon@kernel.org, jgg@ziepe.ca, zyjzyj2000@gmail.com
Cc: Daisuke Matsuda <dskmtsd@gmail.com>
Subject: [PATCH for-next v2 2/2] RDMA/rxe: Enable asynchronous prefetch for ODP MRs
Date: Sat, 3 May 2025 13:42:24 +0000 [thread overview]
Message-ID: <20250503134224.4867-3-dskmtsd@gmail.com> (raw)
In-Reply-To: <20250503134224.4867-1-dskmtsd@gmail.com>
Calling ibv_advise_mr(3) with flags other than IBV_ADVISE_MR_FLAG_FLUSH
invokes asynchronous requests. It is best-effort, and thus can safely be
deferred to the system-wide workqueue.
Signed-off-by: Daisuke Matsuda <dskmtsd@gmail.com>
---
drivers/infiniband/sw/rxe/rxe_odp.c | 81 ++++++++++++++++++++++++++++-
1 file changed, 80 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c
index e5c60b061d7e..d98b385a18ce 100644
--- a/drivers/infiniband/sw/rxe/rxe_odp.c
+++ b/drivers/infiniband/sw/rxe/rxe_odp.c
@@ -425,6 +425,73 @@ enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
return RESPST_NONE;
}
+struct prefetch_mr_work {
+ struct work_struct work;
+ u32 pf_flags;
+ u32 num_sge;
+ struct {
+ u64 io_virt;
+ struct rxe_mr *mr;
+ size_t length;
+ } frags[];
+};
+
+static void rxe_ib_prefetch_mr_work(struct work_struct *w)
+{
+ struct prefetch_mr_work *work =
+ container_of(w, struct prefetch_mr_work, work);
+ int ret;
+ u32 i;
+
+ /* We rely on IB/core that work is executed if we have num_sge != 0 only. */
+ WARN_ON(!work->num_sge);
+ for (i = 0; i < work->num_sge; ++i) {
+ struct ib_umem_odp *umem_odp;
+
+ ret = rxe_odp_do_pagefault_and_lock(work->frags[i].mr, work->frags[i].io_virt,
+ work->frags[i].length, work->pf_flags);
+ if (ret < 0) {
+ rxe_dbg_mr(work->frags[i].mr, "failed to prefetch the mr\n");
+ continue;
+ }
+
+ umem_odp = to_ib_umem_odp(work->frags[i].mr->umem);
+ mutex_unlock(&umem_odp->umem_mutex);
+ }
+
+ kvfree(work);
+}
+
+static int rxe_init_prefetch_work(struct ib_pd *ibpd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 pf_flags, struct prefetch_mr_work *work,
+ struct ib_sge *sg_list, u32 num_sge)
+{
+ struct rxe_pd *pd = container_of(ibpd, struct rxe_pd, ibpd);
+ u32 i;
+
+ INIT_WORK(&work->work, rxe_ib_prefetch_mr_work);
+ work->pf_flags = pf_flags;
+
+ for (i = 0; i < num_sge; ++i) {
+ struct rxe_mr *mr;
+
+ mr = lookup_mr(pd, IB_ACCESS_LOCAL_WRITE,
+ sg_list[i].lkey, RXE_LOOKUP_LOCAL);
+ if (IS_ERR(mr)) {
+ work->num_sge = i;
+ return PTR_ERR(mr);
+ }
+ work->frags[i].io_virt = sg_list[i].addr;
+ work->frags[i].length = sg_list[i].length;
+ work->frags[i].mr = mr;
+
+ rxe_put(mr);
+ }
+ work->num_sge = num_sge;
+ return 0;
+}
+
static int rxe_ib_prefetch_sg_list(struct ib_pd *ibpd,
enum ib_uverbs_advise_mr_advice advice,
u32 pf_flags, struct ib_sge *sg_list,
@@ -478,6 +545,8 @@ static int rxe_ib_advise_mr_prefetch(struct ib_pd *ibpd,
u32 flags, struct ib_sge *sg_list, u32 num_sge)
{
u32 pf_flags = RXE_PAGEFAULT_DEFAULT;
+ struct prefetch_mr_work *work;
+ int rc;
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
pf_flags |= RXE_PAGEFAULT_RDONLY;
@@ -490,7 +559,17 @@ static int rxe_ib_advise_mr_prefetch(struct ib_pd *ibpd,
return rxe_ib_prefetch_sg_list(ibpd, advice, pf_flags, sg_list,
num_sge);
- /* Asynchronous call is "best-effort" */
+ /* Asynchronous call is "best-effort" and allowed to fail */
+ work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ rc = rxe_init_prefetch_work(ibpd, advice, pf_flags, work, sg_list, num_sge);
+ if (rc) {
+ kvfree(work);
+ return rc;
+ }
+ queue_work(system_unbound_wq, &work->work);
return 0;
}
--
2.43.0
next prev parent reply other threads:[~2025-05-03 13:43 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-03 13:42 [PATCH for-next v2 0/2] RDMA/rxe: Prefetching pages with explicit ODP Daisuke Matsuda
2025-05-03 13:42 ` [PATCH for-next v2 1/2] RDMA/rxe: Implement synchronous prefetch for ODP MRs Daisuke Matsuda
2025-05-05 7:57 ` Zhu Yanjun
2025-05-09 11:51 ` Daisuke Matsuda
2025-05-09 15:19 ` Zhu Yanjun
2025-05-10 2:46 ` Daisuke Matsuda
2025-05-10 4:43 ` Zhu Yanjun
2025-05-10 7:18 ` Daisuke Matsuda
2025-05-10 8:04 ` Greg Sword
2025-05-11 2:06 ` Daisuke Matsuda
2025-05-11 4:52 ` Zhu Yanjun
2025-05-13 5:23 ` Daisuke Matsuda
2025-05-03 13:42 ` Daisuke Matsuda [this message]
2025-05-05 15:25 ` [PATCH for-next v2 2/2] RDMA/rxe: Enable asynchronous " Zhu Yanjun
2025-05-09 12:19 ` Daisuke Matsuda
2025-05-09 12:52 ` Zhu Yanjun
2025-05-09 14:48 ` Zhu Yanjun
2025-05-03 17:08 ` [PATCH for-next v2 0/2] RDMA/rxe: Prefetching pages with explicit ODP Zhu Yanjun
2025-05-04 9:23 ` Daisuke Matsuda
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250503134224.4867-3-dskmtsd@gmail.com \
--to=dskmtsd@gmail.com \
--cc=jgg@ziepe.ca \
--cc=leon@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=zyjzyj2000@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.