All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zhu Yanjun <yanjun.zhu@linux.dev>
To: Bob Pearson <rpearsonhpe@gmail.com>,
	jgg@nvidia.com, linux-rdma@vger.kernel.org, dsahern@kernel.org,
	davem@davemloft.net, netdev@vger.kernel.org
Subject: Re: [PATCH for-next v5 4/7] RDMA/rxe: Let rxe_lookup_mcg use rcu_read_lock
Date: Tue, 5 Dec 2023 13:56:43 +0800	[thread overview]
Message-ID: <39b6a032-e530-4b33-99dc-3d3378504e15@linux.dev> (raw)
In-Reply-To: <20231205002613.10219-2-rpearsonhpe@gmail.com>

Add  David S. Miller and  David Ahern.

They are the maintainers in netdev and very familiar with mcast.

Zhu Yanjun

在 2023/12/5 8:26, Bob Pearson 写道:
> Change locking of read side operations of the mcast group
> red-black tree to use rcu read locking. This will allow changing
> the mcast lock in the next patch to be a mutex without
> breaking rxe_recv.c which runs in an atomic state. It is also a
> better implementation than the current use of a spin-lock per
> rdma device since receiving mcast packets will be much more
> common than registering/deregistering mcast groups.
>
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
>   drivers/infiniband/sw/rxe/rxe_mcast.c | 59 +++++++++------------------
>   drivers/infiniband/sw/rxe/rxe_verbs.h |  1 +
>   2 files changed, 21 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> index 54735d07cee5..44948f9cb02b 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> @@ -151,13 +151,18 @@ static void __rxe_insert_mcg(struct rxe_mcg *mcg)
>   		tmp = rb_entry(node, struct rxe_mcg, node);
>   
>   		cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid));
> -		if (cmp > 0)
> +		if (cmp > 0) {
>   			link = &(*link)->rb_left;
> -		else
> +		} else if (cmp < 0) {
>   			link = &(*link)->rb_right;
> +		} else {
> +			/* we must delete the old mcg before adding one */
> +			WARN_ON_ONCE(1);
> +			return;
> +		}
>   	}
>   
> -	rb_link_node(&mcg->node, node, link);
> +	rb_link_node_rcu(&mcg->node, node, link);
>   	rb_insert_color(&mcg->node, tree);
>   }
>   
> @@ -172,15 +177,11 @@ static void __rxe_remove_mcg(struct rxe_mcg *mcg)
>   	rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
>   }
>   
> -/**
> - * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock
> - * @rxe: rxe device object
> - * @mgid: multicast IP address
> - *
> - * Context: caller must hold rxe->mcg_lock
> - * Returns: mcg on success and takes a ref to mcg else NULL
> +/*
> + * Lookup mgid in the multicast group red-black tree and try to
> + * get a ref on it. Return mcg on success else NULL.
>    */
> -static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
> +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe,
>   					union ib_gid *mgid)
>   {
>   	struct rb_root *tree = &rxe->mcg_tree;
> @@ -188,7 +189,8 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
>   	struct rb_node *node;
>   	int cmp;
>   
> -	node = tree->rb_node;
> +	rcu_read_lock();
> +	node = rcu_dereference_raw(tree->rb_node);
>   
>   	while (node) {
>   		mcg = rb_entry(node, struct rxe_mcg, node);
> @@ -196,35 +198,14 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
>   		cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid));
>   
>   		if (cmp > 0)
> -			node = node->rb_left;
> +			node = rcu_dereference_raw(node->rb_left);
>   		else if (cmp < 0)
> -			node = node->rb_right;
> +			node = rcu_dereference_raw(node->rb_right);
>   		else
>   			break;
>   	}
> -
> -	if (node) {
> -		kref_get(&mcg->ref_cnt);
> -		return mcg;
> -	}
> -
> -	return NULL;
> -}
> -
> -/**
> - * rxe_lookup_mcg - lookup up mcg in red-back tree
> - * @rxe: rxe device object
> - * @mgid: multicast IP address
> - *
> - * Returns: mcg if found else NULL
> - */
> -struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> -{
> -	struct rxe_mcg *mcg;
> -
> -	spin_lock_bh(&rxe->mcg_lock);
> -	mcg = __rxe_lookup_mcg(rxe, mgid);
> -	spin_unlock_bh(&rxe->mcg_lock);
> +	mcg = (node && kref_get_unless_zero(&mcg->ref_cnt)) ? mcg : NULL;
> +	rcu_read_unlock();
>   
>   	return mcg;
>   }
> @@ -292,7 +273,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>   
>   	spin_lock_bh(&rxe->mcg_lock);
>   	/* re-check to see if someone else just added it */
> -	tmp = __rxe_lookup_mcg(rxe, mgid);
> +	tmp = rxe_lookup_mcg(rxe, mgid);
>   	if (tmp) {
>   		spin_unlock_bh(&rxe->mcg_lock);
>   		atomic_dec(&rxe->mcg_num);
> @@ -322,7 +303,7 @@ void rxe_cleanup_mcg(struct kref *kref)
>   {
>   	struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
>   
> -	kfree(mcg);
> +	kfree_rcu(mcg, rcu);
>   }
>   
>   /**
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index 7be9e6232dd9..8058e5039322 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -345,6 +345,7 @@ struct rxe_mw {
>   
>   struct rxe_mcg {
>   	struct rb_node		node;
> +	struct rcu_head		rcu;
>   	struct kref		ref_cnt;
>   	struct rxe_dev		*rxe;
>   	struct list_head	qp_list;

  reply	other threads:[~2023-12-05  5:56 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-05  0:26 [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address Bob Pearson
2023-12-05  0:26 ` [PATCH for-next v5 4/7] RDMA/rxe: Let rxe_lookup_mcg use rcu_read_lock Bob Pearson
2023-12-05  5:56   ` Zhu Yanjun [this message]
2023-12-05  0:26 ` [PATCH for-next v5 5/7] RDMA/rxe: Split multicast lock Bob Pearson
2023-12-05  5:57   ` Zhu Yanjun
2023-12-05  0:26 ` [PATCH for-next v5 6/7] RDMA/rxe: Cleanup mcg lifetime Bob Pearson
2023-12-05  5:57   ` Zhu Yanjun
2023-12-05  0:26 ` [PATCH for-next v5 7/7] RDMA/rxe: Add module parameters for mcast limits Bob Pearson
2023-12-05  5:58   ` Zhu Yanjun
2023-12-05  5:55 ` [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address Zhu Yanjun
2023-12-05 10:29   ` Zhu Yanjun
2023-12-07  1:47     ` Rain River
2023-12-07 19:07       ` Bob Pearson
2023-12-08  1:24         ` Greg Sword

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=39b6a032-e530-4b33-99dc-3d3378504e15@linux.dev \
    --to=yanjun.zhu@linux.dev \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=rpearsonhpe@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.