All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: Daisuke Matsuda <dskmtsd@gmail.com>,
	linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org,
	leon@kernel.org, jgg@ziepe.ca, zyjzyj2000@gmail.com
Subject: Re: [PATCH for-next v2 2/2] RDMA/rxe: Enable asynchronous prefetch for ODP MRs
Date: Mon, 5 May 2025 17:25:26 +0200	[thread overview]
Message-ID: <dbc1bcdf-144d-44d2-8fc8-77bc2ad58b51@linux.dev> (raw)
In-Reply-To: <20250503134224.4867-3-dskmtsd@gmail.com>

On 03.05.25 15:42, Daisuke Matsuda wrote:
> Calling ibv_advise_mr(3) with flags other than IBV_ADVISE_MR_FLAG_FLUSH
> invokes asynchronous requests. It is best-effort, and thus can safely be
> deferred to the system-wide workqueue.
> 
> Signed-off-by: Daisuke Matsuda <dskmtsd@gmail.com>

I have made tests with rdma-core after applying this patch series. It 
seems that it can work well.
I read through this commit. Other than the following minor problems, I 
am fine with this commit.

Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>

> ---
>   drivers/infiniband/sw/rxe/rxe_odp.c | 81 ++++++++++++++++++++++++++++-
>   1 file changed, 80 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c
> index e5c60b061d7e..d98b385a18ce 100644
> --- a/drivers/infiniband/sw/rxe/rxe_odp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_odp.c
> @@ -425,6 +425,73 @@ enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
>   	return RESPST_NONE;
>   }
>   
> +struct prefetch_mr_work {
> +	struct work_struct work;
> +	u32 pf_flags;
> +	u32 num_sge;
> +	struct {
> +		u64 io_virt;
> +		struct rxe_mr *mr;
> +		size_t length;
> +	} frags[];
> +};

The struct prefetch_mr_work should be moved into header file? IMO, it is 
better to move this struct to rxe_loc.h?

> +
> +static void rxe_ib_prefetch_mr_work(struct work_struct *w)
> +{
> +	struct prefetch_mr_work *work =
> +		container_of(w, struct prefetch_mr_work, work);
> +	int ret;
> +	u32 i;
> +
> +	/* We rely on IB/core that work is executed if we have num_sge != 0 only. */
> +	WARN_ON(!work->num_sge);
> +	for (i = 0; i < work->num_sge; ++i) {
> +		struct ib_umem_odp *umem_odp;
> +
> +		ret = rxe_odp_do_pagefault_and_lock(work->frags[i].mr, work->frags[i].io_virt,
> +						    work->frags[i].length, work->pf_flags);
> +		if (ret < 0) {
> +			rxe_dbg_mr(work->frags[i].mr, "failed to prefetch the mr\n");
> +			continue;
> +		}
> +
> +		umem_odp = to_ib_umem_odp(work->frags[i].mr->umem);
> +		mutex_unlock(&umem_odp->umem_mutex);

Obviously this function is dependent on the mutex lock umem_mutex. So in 
the beginning of this function, it is better to  add 
lockdep_assert_held(&umem_odp->umem_mutex)?

Zhu Yanjun

> +	}
> +
> +	kvfree(work);
> +}
> +
> +static int rxe_init_prefetch_work(struct ib_pd *ibpd,
> +				  enum ib_uverbs_advise_mr_advice advice,
> +				  u32 pf_flags, struct prefetch_mr_work *work,
> +				  struct ib_sge *sg_list, u32 num_sge)
> +{
> +	struct rxe_pd *pd = container_of(ibpd, struct rxe_pd, ibpd);
> +	u32 i;
> +
> +	INIT_WORK(&work->work, rxe_ib_prefetch_mr_work);
> +	work->pf_flags = pf_flags;
> +
> +	for (i = 0; i < num_sge; ++i) {
> +		struct rxe_mr *mr;
> +
> +		mr = lookup_mr(pd, IB_ACCESS_LOCAL_WRITE,
> +			       sg_list[i].lkey, RXE_LOOKUP_LOCAL);
> +		if (IS_ERR(mr)) {
> +			work->num_sge = i;
> +			return PTR_ERR(mr);
> +		}
> +		work->frags[i].io_virt = sg_list[i].addr;
> +		work->frags[i].length = sg_list[i].length;
> +		work->frags[i].mr = mr;
> +
> +		rxe_put(mr);
> +	}
> +	work->num_sge = num_sge;
> +	return 0;
> +}
> +
>   static int rxe_ib_prefetch_sg_list(struct ib_pd *ibpd,
>   				   enum ib_uverbs_advise_mr_advice advice,
>   				   u32 pf_flags, struct ib_sge *sg_list,
> @@ -478,6 +545,8 @@ static int rxe_ib_advise_mr_prefetch(struct ib_pd *ibpd,
>   				     u32 flags, struct ib_sge *sg_list, u32 num_sge)
>   {
>   	u32 pf_flags = RXE_PAGEFAULT_DEFAULT;
> +	struct prefetch_mr_work *work;
> +	int rc;
>   
>   	if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
>   		pf_flags |= RXE_PAGEFAULT_RDONLY;
> @@ -490,7 +559,17 @@ static int rxe_ib_advise_mr_prefetch(struct ib_pd *ibpd,
>   		return rxe_ib_prefetch_sg_list(ibpd, advice, pf_flags, sg_list,
>   					       num_sge);
>   
> -	/* Asynchronous call is "best-effort" */
> +	/* Asynchronous call is "best-effort" and allowed to fail */
> +	work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL);
> +	if (!work)
> +		return -ENOMEM;
> +
> +	rc = rxe_init_prefetch_work(ibpd, advice, pf_flags, work, sg_list, num_sge);
> +	if (rc) {
> +		kvfree(work);
> +		return rc;
> +	}
> +	queue_work(system_unbound_wq, &work->work);
>   
>   	return 0;
>   }


  reply	other threads:[~2025-05-05 15:25 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-03 13:42 [PATCH for-next v2 0/2] RDMA/rxe: Prefetching pages with explicit ODP Daisuke Matsuda
2025-05-03 13:42 ` [PATCH for-next v2 1/2] RDMA/rxe: Implement synchronous prefetch for ODP MRs Daisuke Matsuda
2025-05-05  7:57   ` Zhu Yanjun
2025-05-09 11:51     ` Daisuke Matsuda
2025-05-09 15:19   ` Zhu Yanjun
2025-05-10  2:46     ` Daisuke Matsuda
2025-05-10  4:43       ` Zhu Yanjun
2025-05-10  7:18         ` Daisuke Matsuda
2025-05-10  8:04           ` Greg Sword
2025-05-11  2:06             ` Daisuke Matsuda
2025-05-11  4:52               ` Zhu Yanjun
2025-05-13  5:23                 ` Daisuke Matsuda
2025-05-03 13:42 ` [PATCH for-next v2 2/2] RDMA/rxe: Enable asynchronous " Daisuke Matsuda
2025-05-05 15:25   ` Zhu Yanjun [this message]
2025-05-09 12:19     ` Daisuke Matsuda
2025-05-09 12:52       ` Zhu Yanjun
2025-05-09 14:48       ` Zhu Yanjun
2025-05-03 17:08 ` [PATCH for-next v2 0/2] RDMA/rxe: Prefetching pages with explicit ODP Zhu Yanjun
2025-05-04  9:23   ` Daisuke Matsuda

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dbc1bcdf-144d-44d2-8fc8-77bc2ad58b51@linux.dev \
    --to=yanjun.zhu@linux.dev \
    --cc=dskmtsd@gmail.com \
    --cc=jgg@ziepe.ca \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=zyjzyj2000@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.