All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@nvidia.com>
To: Bob Pearson <rpearsonhpe@gmail.com>
Cc: zyjzyj2000@gmail.com, linux-rdma@vger.kernel.org
Subject: Re: [PATCH for-next v12 6/6] RDMA/rxe: Convert mca read locking to RCU
Date: Wed, 23 Feb 2022 15:52:22 -0400	[thread overview]
Message-ID: <20220223195222.GA420650@nvidia.com> (raw)
In-Reply-To: <20220218003543.205799-7-rpearsonhpe@gmail.com>

On Thu, Feb 17, 2022 at 06:35:44PM -0600, Bob Pearson wrote:
> Replace spinlock with rcu read locks for read side operations
> on mca in rxe_recv.c and rxe_mcast.c.
> 
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
>  drivers/infiniband/sw/rxe/rxe_mcast.c | 97 +++++++++++++++++----------
>  drivers/infiniband/sw/rxe/rxe_recv.c  |  6 +-
>  drivers/infiniband/sw/rxe/rxe_verbs.h |  2 +
>  3 files changed, 67 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> index 349a6fac2fcc..2bfec3748e1e 100644
> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> @@ -17,6 +17,12 @@
>   * mca is created. It holds a pointer to the qp and is added to a list
>   * of qp's that are attached to the mcg. The qp_list is used to replicate
>   * mcast packets in the rxe receive path.
> + *
> + * The highest performance operations are mca list traversal when
> + * processing incoming multicast packets which need to be fanned out
> + * to the attached qp's. This list is protected by RCU locking for read
> + * operations and a spinlock in the rxe_dev struct for write operations.
> + * The red-black tree is protected by the same spinlock.
>   */
>  
>  #include "rxe.h"
> @@ -288,7 +294,7 @@ static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>  }
>  
>  /**
> - * __rxe_init_mca - initialize a new mca holding lock
> + * __rxe_init_mca_rcu - initialize a new mca holding lock
>   * @qp: qp object
>   * @mcg: mcg object
>   * @mca: empty space for new mca
> @@ -298,8 +304,8 @@ static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>   *
>   * Returns: 0 on success else an error
>   */
> -static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg,
> -			  struct rxe_mca *mca)
> +static int __rxe_init_mca_rcu(struct rxe_qp *qp, struct rxe_mcg *mcg,
> +			      struct rxe_mca *mca)
>  {

This isn't really 'rcu', it still has to hold the write side lock

>  	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
>  	int n;
> @@ -322,7 +328,10 @@ static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg,
>  	rxe_add_ref(qp);
>  	mca->qp = qp;
>  
> -	list_add_tail(&mca->qp_list, &mcg->qp_list);
> +	kref_get(&mcg->ref_cnt);
> +	mca->mcg = mcg;
> +
> +	list_add_tail_rcu(&mca->qp_list, &mcg->qp_list);

list_add_tail gets to be called rcu because it is slower than the
non-rcu safe version.

> -static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg)
> +static void __rxe_destroy_mca(struct rcu_head *head)
>  {
> -	list_del(&mca->qp_list);
> +	struct rxe_mca *mca = container_of(head, typeof(*mca), rcu);
> +	struct rxe_mcg *mcg = mca->mcg;
>  
>  	atomic_dec(&mcg->qp_num);
>  	atomic_dec(&mcg->rxe->mcg_attach);
> @@ -394,6 +404,19 @@ static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg)
>  	kfree(mca);

> +/**
> + * __rxe_cleanup_mca_rcu - cleanup mca object holding lock
> + * @mca: mca object
> + * @mcg: mcg object
> + *
> + * Context: caller must hold a reference to mcg and rxe->mcg_lock
> + */
> +static void __rxe_cleanup_mca_rcu(struct rxe_mca *mca, struct rxe_mcg *mcg)
> +{
> +	list_del_rcu(&mca->qp_list);
> +	call_rcu(&mca->rcu, __rxe_destroy_mca);
> +}

I think this is OK now..

> +
>  /**
>   * rxe_detach_mcg - detach qp from mcg
>   * @mcg: mcg object
> @@ -404,31 +427,35 @@ static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg)
>  static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>  {
>  	struct rxe_dev *rxe = mcg->rxe;
> -	struct rxe_mca *mca, *tmp;
> -	unsigned long flags;
> +	struct rxe_mca *mca;
> +	int ret;
>  
> -	spin_lock_irqsave(&rxe->mcg_lock, flags);
> -	list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
> -		if (mca->qp == qp) {
> -			__rxe_cleanup_mca(mca, mcg);
> -
> -			/* if the number of qp's attached to the
> -			 * mcast group falls to zero go ahead and
> -			 * tear it down. This will not free the
> -			 * object since we are still holding a ref
> -			 * from the caller
> -			 */
> -			if (atomic_read(&mcg->qp_num) <= 0)
> -				__rxe_destroy_mcg(mcg);
> -
> -			spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> -			return 0;
> -		}
> +	spin_lock_bh(&rxe->mcg_lock);
> +	list_for_each_entry_rcu(mca, &mcg->qp_list, qp_list) {
> +		if (mca->qp == qp)
> +			goto found;
>  	}
>  
>  	/* we didn't find the qp on the list */
> -	spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> -	return -EINVAL;
> +	ret = -EINVAL;
> +	goto done;
> +
> +found:
> +	__rxe_cleanup_mca_rcu(mca, mcg);
> +
> +	/* if the number of qp's attached to the
> +	 * mcast group falls to zero go ahead and
> +	 * tear it down. This will not free the
> +	 * object since we are still holding a ref
> +	 * from the caller
> +	 */

Fix the word wrap

Would prefer to avoid the found label in the middle of the code.

> +	rcu_read_lock();
> +	list_for_each_entry_rcu(mca, &mcg->qp_list, qp_list) {
>  		/* protect the qp pointers in the list */
>  		rxe_add_ref(mca->qp);

The other approach you could take is to als kref_free the qp and allow
its kref to become zero here. But this is fine, I think.

Jason

  reply	other threads:[~2022-02-23 19:52 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-18  0:35 [PATCH for-next v12 0/6] Move two object pools to rxe_mcast.c Bob Pearson
2022-02-18  0:35 ` [PATCH for-next v12 1/6] RDMA/rxe: Add code to cleanup mcast memory Bob Pearson
2022-02-23 19:33   ` Jason Gunthorpe
2022-02-18  0:35 ` [PATCH for-next v12 2/6] RDMA/rxe: Collect mca init code in a subroutine Bob Pearson
2022-02-18  0:35 ` [PATCH for-next v12 3/6] RDMA/rxe: Collect cleanup mca " Bob Pearson
2022-02-18  0:35 ` [PATCH for-next v12 4/6] RDMA/rxe: Cleanup rxe_mcast.c Bob Pearson
2022-02-18  0:35 ` [PATCH for-next v12 5/6] RDMA/rxe: For mcast copy qp list to temp array Bob Pearson
2022-02-18  0:35 ` [PATCH for-next v12 6/6] RDMA/rxe: Convert mca read locking to RCU Bob Pearson
2022-02-23 19:52   ` Jason Gunthorpe [this message]
2022-02-23 22:40     ` Bob Pearson
2022-02-24  0:04       ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220223195222.GA420650@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=rpearsonhpe@gmail.com \
    --cc=zyjzyj2000@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.