linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@nvidia.com>
To: Bob Pearson <rpearsonhpe@gmail.com>
Cc: zyjzyj2000@gmail.com, linux-rdma@vger.kernel.org
Subject: Re: [RFC PATCH v9 18/26] RDMA/rxe: Convert mca read locking to RCU
Date: Fri, 28 Jan 2022 14:39:55 -0400	[thread overview]
Message-ID: <20220128183955.GG1786498@nvidia.com> (raw)
In-Reply-To: <20220127213755.31697-19-rpearsonhpe@gmail.com>

On Thu, Jan 27, 2022 at 03:37:47PM -0600, Bob Pearson wrote:
>  /**
> - * __rxe_init_mca - initialize a new mca holding lock
> + * __rxe_init_mca_rcu - initialize a new mca holding lock
>   * @qp: qp object
>   * @mcg: mcg object
>   * @mca: empty space for new mca
> @@ -280,7 +281,7 @@ void rxe_cleanup_mcg(struct kref *kref)
>   *
>   * Returns: 0 on success else an error
>   */
> -static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg,
> +static int __rxe_init_mca_rcu(struct rxe_qp *qp, struct rxe_mcg *mcg,
>  			  struct rxe_mca *mca)

There is nothing "rcu" about this function..

> @@ -324,14 +325,14 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>  	int err;
>  
>  	/* check to see if the qp is already a member of the group */
> -	spin_lock_bh(&rxe->mcg_lock);
> -	list_for_each_entry(mca, &mcg->qp_list, qp_list) {
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(mca, &mcg->qp_list, qp_list) {
>  		if (mca->qp == qp) {
> -			spin_unlock_bh(&rxe->mcg_lock);
> +			rcu_read_unlock();
>  			return 0;
>  		}
>  	}
> -	spin_unlock_bh(&rxe->mcg_lock);
> +	rcu_read_unlock();

Ok..

> @@ -340,16 +341,19 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>  
>  	spin_lock_bh(&rxe->mcg_lock);
>  	/* re-check to see if someone else just attached qp */
> -	list_for_each_entry(mca, &mcg->qp_list, qp_list) {
> +	rcu_read_lock();

Do not hold the RCU if you are holding the write side spinlock. All
mutations o fthe list must hold mcg_lock.

> +	list_for_each_entry_rcu(mca, &mcg->qp_list, qp_list) {
>  		if (mca->qp == qp) {
> +			rcu_read_unlock();
>  			kfree(new_mca);
>  			err = 0;
>  			goto done;
>  		}
>  	}
> +	rcu_read_unlock();
>  
>  	mca = new_mca;
> -	err = __rxe_init_mca(qp, mcg, mca);
> +	err = __rxe_init_mca_rcu(qp, mcg, mca);
>  	if (err)
>  		kfree(mca);

Which looks since the list_add is still inside the spinlock

>  done:
> @@ -359,21 +363,23 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>  }
>  
>  /**
> - * __rxe_cleanup_mca - cleanup mca object holding lock
> + * __rxe_cleanup_mca_rcu - cleanup mca object holding lock
>   * @mca: mca object
>   * @mcg: mcg object
>   *
>   * Context: caller must hold a reference to mcg and rxe->mcg_lock
>   */
> -static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg)
> +static void __rxe_cleanup_mca_rcu(struct rxe_mca *mca, struct rxe_mcg *mcg)

Also not rcu, list_del must hold the write side spinlock.

>  {
> -	list_del(&mca->qp_list);
> +	list_del_rcu(&mca->qp_list);
>  
>  	atomic_dec(&mcg->qp_num);
>  	atomic_dec(&mcg->rxe->mcg_attach);
>  	atomic_dec(&mca->qp->mcg_num);
>  
>  	rxe_drop_ref(mca->qp);
> +
> +	kfree_rcu(mca, rcu);

OK

>  }
>  
>  /**
> @@ -386,22 +392,29 @@ static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg)
>  static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>  {
>  	struct rxe_dev *rxe = mcg->rxe;
> -	struct rxe_mca *mca, *tmp;
> +	struct rxe_mca *mca;
> +	int ret;
>  
>  	spin_lock_bh(&rxe->mcg_lock);
> -	list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(mca, &mcg->qp_list, qp_list) {

As before, don't hold the rcu when holding the write side lock

>  		if (mca->qp == qp) {
> -			__rxe_cleanup_mca(mca, mcg);
> -			if (atomic_read(&mcg->qp_num) <= 0)
> -				kref_put(&mcg->ref_cnt, __rxe_cleanup_mcg);
> -			spin_unlock_bh(&rxe->mcg_lock);
> -			kfree(mca);
> -			return 0;
> +			rcu_read_unlock();
> +			goto found;
>  		}
>  	}
> +	rcu_read_unlock();
> +	ret = -EINVAL;
> +	goto done;
> +found:
> +	__rxe_cleanup_mca_rcu(mca, mcg);
> +	if (atomic_read(&mcg->qp_num) <= 0)
> +		kref_put(&mcg->ref_cnt, __rxe_cleanup_mcg);

This is confusing, why an atomic and a refcount with an atomic? Isn't
qpnum == 0 the same as list_empty(qp_list) ?

> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
> index 357a6cea1484..7f2ea61a52c1 100644
> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
> @@ -267,13 +267,13 @@ static void rxe_rcv_mcast_pkt(struct sk_buff *skb)
>  	qp_array = kmalloc_array(nmax, sizeof(qp), GFP_KERNEL);
>  
>  	n = 0;
> -	spin_lock_bh(&rxe->mcg_lock);
> -	list_for_each_entry(mca, &mcg->qp_list, qp_list) {
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(mca, &mcg->qp_list, qp_list) {
>  		qp_array[n++] = mca->qp;
>  		if (n == nmax)
>  			break;
>  	}
> -	spin_unlock_bh(&rxe->mcg_lock);
> +	rcu_read_unlock();
>  	kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);

I have no idea how this works, what keeps 'qp' valid and prevents it
from being free'd once we leave the locking? Remember the mca can be
in concurrent progress to free so qp is just garbage under RCU at this
point.

Jason

  reply	other threads:[~2022-01-28 18:40 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-27 21:37 [RFC PATCH v9 00/26] Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 01/26] RDMA/rxe: Move rxe_mcast_add/delete to rxe_mcast.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 02/26] RDMA/rxe: Move rxe_mcast_attach/detach " Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 03/26] RDMA/rxe: Rename rxe_mc_grp and rxe_mc_elem Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 04/26] RDMA/rxe: Enforce IBA o10-2.2.3 Bob Pearson
2022-01-28 12:53   ` Jason Gunthorpe
2022-01-28 16:18     ` Bob Pearson
2022-01-28 16:42       ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 05/26] RDMA/rxe: Remove rxe_drop_all_macst_groups Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 06/26] RDMA/rxe: Remove qp->grp_lock and qp->grp_list Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 07/26] RDMA/rxe: Use kzmalloc/kfree for mca Bob Pearson
2022-01-28 18:00   ` Jason Gunthorpe
2022-01-27 21:37 ` [RFC PATCH v9 08/26] RDMA/rxe: Rename grp to mcg and mce to mca Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 09/26] RDMA/rxe: Introduce RXECB(skb) Bob Pearson
2022-01-28 18:29   ` Jason Gunthorpe
2022-01-30 17:47     ` Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 10/26] RDMA/rxe: Split rxe_rcv_mcast_pkt into two phases Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 11/26] RDMA/rxe: Replace locks by rxe->mcg_lock Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 12/26] RDMA/rxe: Replace pool key by rxe->mcg_tree Bob Pearson
2022-01-28 18:32   ` Jason Gunthorpe
2022-01-30 23:23     ` Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 13/26] RDMA/rxe: Remove key'ed object support Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 14/26] RDMA/rxe: Remove mcg from rxe pools Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 15/26] RDMA/rxe: Add code to cleanup mcast memory Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 16/26] RDMA/rxe: Add comments to rxe_mcast.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 17/26] RDMA/rxe: Separate code into subroutines Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 18/26] RDMA/rxe: Convert mca read locking to RCU Bob Pearson
2022-01-28 18:39   ` Jason Gunthorpe [this message]
2022-01-27 21:37 ` [RFC PATCH v9 19/26] RDMA/rxe: Reverse the sense of RXE_POOL_NO_ALLOC Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 20/26] RDMA/rxe: Delete _locked() APIs for pool objects Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 21/26] RDMA/rxe: Replace obj by elem in declaration Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 22/26] RDMA/rxe: Replace red-black trees by xarrays Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 23/26] RDMA/rxe: Change pool locking to RCU Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 24/26] RDMA/rxe: Add wait_for_completion to pool objects Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 25/26] RDMA/rxe: Fix ref error in rxe_av.c Bob Pearson
2022-01-27 21:37 ` [RFC PATCH v9 26/26] RDMA/rxe: Replace mr by rkey in responder resources Bob Pearson
2022-01-28 18:42 ` [RFC PATCH v9 00/26] Jason Gunthorpe
2022-02-07 19:20   ` Bob Pearson
2022-02-07 19:38     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220128183955.GG1786498@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=rpearsonhpe@gmail.com \
    --cc=zyjzyj2000@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).