netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: Bob Pearson <rpearsonhpe@gmail.com>,
	jgg@nvidia.com, linux-rdma@vger.kernel.org, dsahern@kernel.org,
	davem@davemloft.net, netdev@vger.kernel.org
Subject: Re: [PATCH for-next v5 4/7] RDMA/rxe: Let rxe_lookup_mcg use rcu_read_lock
Date: Tue, 5 Dec 2023 13:56:43 +0800	[thread overview]
Message-ID: <39b6a032-e530-4b33-99dc-3d3378504e15@linux.dev> (raw)
In-Reply-To: <20231205002613.10219-2-rpearsonhpe@gmail.com>

Add  David S. Miller and  David Ahern.

They are the maintainers in netdev and very familiar with mcast.

Zhu Yanjun

在 2023/12/5 8:26, Bob Pearson 写道:
> Change locking of read side operations of the mcast group
> red-black tree to use rcu read locking. This will allow changing
> the mcast lock in the next patch to be a mutex without
> breaking rxe_recv.c which runs in an atomic state. It is also a
> better implementation than the current use of a spin-lock per
> rdma device since receiving mcast packets will be much more
> common than registering/deregistering mcast groups.
>
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
>   drivers/infiniband/sw/rxe/rxe_mcast.c | 59 +++++++++------------------
>   drivers/infiniband/sw/rxe/rxe_verbs.h |  1 +
>   2 files changed, 21 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> index 54735d07cee5..44948f9cb02b 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> @@ -151,13 +151,18 @@ static void __rxe_insert_mcg(struct rxe_mcg *mcg)
>   		tmp = rb_entry(node, struct rxe_mcg, node);
>   
>   		cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid));
> -		if (cmp > 0)
> +		if (cmp > 0) {
>   			link = &(*link)->rb_left;
> -		else
> +		} else if (cmp < 0) {
>   			link = &(*link)->rb_right;
> +		} else {
> +			/* we must delete the old mcg before adding one */
> +			WARN_ON_ONCE(1);
> +			return;
> +		}
>   	}
>   
> -	rb_link_node(&mcg->node, node, link);
> +	rb_link_node_rcu(&mcg->node, node, link);
>   	rb_insert_color(&mcg->node, tree);
>   }
>   
> @@ -172,15 +177,11 @@ static void __rxe_remove_mcg(struct rxe_mcg *mcg)
>   	rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
>   }
>   
> -/**
> - * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock
> - * @rxe: rxe device object
> - * @mgid: multicast IP address
> - *
> - * Context: caller must hold rxe->mcg_lock
> - * Returns: mcg on success and takes a ref to mcg else NULL
> +/*
> + * Lookup mgid in the multicast group red-black tree and try to
> + * get a ref on it. Return mcg on success else NULL.
>    */
> -static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
> +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe,
>   					union ib_gid *mgid)
>   {
>   	struct rb_root *tree = &rxe->mcg_tree;
> @@ -188,7 +189,8 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
>   	struct rb_node *node;
>   	int cmp;
>   
> -	node = tree->rb_node;
> +	rcu_read_lock();
> +	node = rcu_dereference_raw(tree->rb_node);
>   
>   	while (node) {
>   		mcg = rb_entry(node, struct rxe_mcg, node);
> @@ -196,35 +198,14 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
>   		cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid));
>   
>   		if (cmp > 0)
> -			node = node->rb_left;
> +			node = rcu_dereference_raw(node->rb_left);
>   		else if (cmp < 0)
> -			node = node->rb_right;
> +			node = rcu_dereference_raw(node->rb_right);
>   		else
>   			break;
>   	}
> -
> -	if (node) {
> -		kref_get(&mcg->ref_cnt);
> -		return mcg;
> -	}
> -
> -	return NULL;
> -}
> -
> -/**
> - * rxe_lookup_mcg - lookup up mcg in red-back tree
> - * @rxe: rxe device object
> - * @mgid: multicast IP address
> - *
> - * Returns: mcg if found else NULL
> - */
> -struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> -{
> -	struct rxe_mcg *mcg;
> -
> -	spin_lock_bh(&rxe->mcg_lock);
> -	mcg = __rxe_lookup_mcg(rxe, mgid);
> -	spin_unlock_bh(&rxe->mcg_lock);
> +	mcg = (node && kref_get_unless_zero(&mcg->ref_cnt)) ? mcg : NULL;
> +	rcu_read_unlock();
>   
>   	return mcg;
>   }
> @@ -292,7 +273,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>   
>   	spin_lock_bh(&rxe->mcg_lock);
>   	/* re-check to see if someone else just added it */
> -	tmp = __rxe_lookup_mcg(rxe, mgid);
> +	tmp = rxe_lookup_mcg(rxe, mgid);
>   	if (tmp) {
>   		spin_unlock_bh(&rxe->mcg_lock);
>   		atomic_dec(&rxe->mcg_num);
> @@ -322,7 +303,7 @@ void rxe_cleanup_mcg(struct kref *kref)
>   {
>   	struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
>   
> -	kfree(mcg);
> +	kfree_rcu(mcg, rcu);
>   }
>   
>   /**
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index 7be9e6232dd9..8058e5039322 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -345,6 +345,7 @@ struct rxe_mw {
>   
>   struct rxe_mcg {
>   	struct rb_node		node;
> +	struct rcu_head		rcu;
>   	struct kref		ref_cnt;
>   	struct rxe_dev		*rxe;
>   	struct list_head	qp_list;

  parent reply	other threads:[~2023-12-05  5:56 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20231205002613.10219-1-rpearsonhpe@gmail.com>
2023-12-05  5:55 ` [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address Zhu Yanjun
2023-12-05 10:29   ` Zhu Yanjun
2023-12-07  1:47     ` Rain River
2023-12-07 19:07       ` Bob Pearson
2023-12-08  1:24         ` Greg Sword
     [not found] ` <20231205002613.10219-2-rpearsonhpe@gmail.com>
2023-12-05  5:56   ` Zhu Yanjun [this message]
     [not found] ` <20231205002613.10219-3-rpearsonhpe@gmail.com>
2023-12-05  5:57   ` [PATCH for-next v5 5/7] RDMA/rxe: Split multicast lock Zhu Yanjun
     [not found] ` <20231205002613.10219-4-rpearsonhpe@gmail.com>
2023-12-05  5:57   ` [PATCH for-next v5 6/7] RDMA/rxe: Cleanup mcg lifetime Zhu Yanjun
     [not found] ` <20231205002613.10219-5-rpearsonhpe@gmail.com>
2023-12-05  5:58   ` [PATCH for-next v5 7/7] RDMA/rxe: Add module parameters for mcast limits Zhu Yanjun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=39b6a032-e530-4b33-99dc-3d3378504e15@linux.dev \
    --to=yanjun.zhu@linux.dev \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=rpearsonhpe@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).