All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Julian Anastasov <ja@ssi.bg>
Cc: Simon Horman <horms@verge.net.au>,
	lvs-devel@vger.kernel.org, netfilter-devel@vger.kernel.org,
	Dust Li <dust.li@linux.alibaba.com>,
	Jiejian Wu <jiejian@linux.alibaba.com>,
	rcu@vger.kernel.org
Subject: Re: [PATCHv6 net-next 03/14] ipvs: some service readers can use RCU
Date: Mon, 24 Nov 2025 22:00:07 +0100	[thread overview]
Message-ID: <aSTHJAR5aXml2ms0@calendula> (raw)
In-Reply-To: <20251019155711.67609-4-ja@ssi.bg>

Hi Julian,

Comments below.

On Sun, Oct 19, 2025 at 06:57:00PM +0300, Julian Anastasov wrote:
> Some places walk the services under mutex but they can just use RCU:
> 
> * ip_vs_dst_event() uses ip_vs_forget_dev() which uses its own lock
>   to modify dest
> * ip_vs_genl_dump_services(): ip_vs_genl_fill_service() just fills skb
> * ip_vs_genl_parse_service(): move RCU lock to callers
>   ip_vs_genl_set_cmd(), ip_vs_genl_dump_dests() and ip_vs_genl_get_cmd()
> * ip_vs_genl_dump_dests(): just fill skb
> 
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> ---
>  net/netfilter/ipvs/ip_vs_ctl.c | 47 +++++++++++++++++-----------------
>  1 file changed, 23 insertions(+), 24 deletions(-)
> 
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index 2fb9034b4f53..b18d08d79bcb 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -1759,23 +1759,21 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
>  	if (event != NETDEV_DOWN || !ipvs)
>  		return NOTIFY_DONE;
>  	IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
> -	mutex_lock(&ipvs->service_mutex);
> +	rcu_read_lock();

Control plane can still add destinations to svc->destinations that can
be skipped by the rcu walk. I think it should be possible to trigger
leaks with a sufficiently large list, given concurrent updates can
happen. ip_vs_forget_dev() has its own lock, but this is a per-dest
lock which is taken during the list walk.

>  	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> -		hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) {
> -			list_for_each_entry(dest, &svc->destinations,
> -					    n_list) {
> +		hlist_for_each_entry_rcu(svc, &ipvs->svc_table[idx], s_list)
> +			list_for_each_entry_rcu(dest, &svc->destinations,
> +						n_list)
>  				ip_vs_forget_dev(dest, dev);
> -			}
> -		}
>  
> -		hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) {
> -			list_for_each_entry(dest, &svc->destinations,
> -					    n_list) {
> +		hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[idx], f_list)
> +			list_for_each_entry_rcu(dest, &svc->destinations,
> +						n_list)
>  				ip_vs_forget_dev(dest, dev);
> -			}
> -		}
>  	}
> +	rcu_read_unlock();
>  
> +	mutex_lock(&ipvs->service_mutex);
>  	spin_lock_bh(&ipvs->dest_trash_lock);
>  	list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
>  		ip_vs_forget_dev(dest, dev);
> @@ -3318,9 +3316,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
>  			goto nla_put_failure;
>  	}
>  
> -	sched = rcu_dereference_protected(svc->scheduler, 1);
> +	sched = rcu_dereference(svc->scheduler);
>  	sched_name = sched ? sched->name : "none";
> -	pe = rcu_dereference_protected(svc->pe, 1);
> +	pe = rcu_dereference(svc->pe);
>  	if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
>  	    (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
>  	    nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
> @@ -3374,9 +3372,9 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
>  	struct net *net = sock_net(skb->sk);
>  	struct netns_ipvs *ipvs = net_ipvs(net);
>  
> -	mutex_lock(&ipvs->service_mutex);
> +	rcu_read_lock();

Here I can see this is safe, because this is netlink path.

>  	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> -		hlist_for_each_entry(svc, &ipvs->svc_table[i], s_list) {
> +		hlist_for_each_entry_rcu(svc, &ipvs->svc_table[i], s_list) {
>  			if (++idx <= start)
>  				continue;
>  			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> @@ -3387,7 +3385,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
>  	}
>  
>  	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> -		hlist_for_each_entry(svc, &ipvs->svc_fwm_table[i], f_list) {
> +		hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[i], f_list) {
>  			if (++idx <= start)
>  				continue;
>  			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> @@ -3398,7 +3396,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
>  	}
>  
>  nla_put_failure:
> -	mutex_unlock(&ipvs->service_mutex);
> +	rcu_read_unlock();
>  	cb->args[0] = idx;
>  
>  	return skb->len;
> @@ -3454,13 +3452,11 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
>  		usvc->fwmark = 0;
>  	}
>  
> -	rcu_read_lock();
>  	if (usvc->fwmark)
>  		svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
>  	else
>  		svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
>  					   &usvc->addr, usvc->port);
> -	rcu_read_unlock();
>  	*ret_svc = svc;
>  
>  	/* If a full entry was requested, check for the additional fields */
> @@ -3587,7 +3583,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
>  	struct net *net = sock_net(skb->sk);
>  	struct netns_ipvs *ipvs = net_ipvs(net);
>  
> -	mutex_lock(&ipvs->service_mutex);
> +	rcu_read_lock();
>  
>  	/* Try to find the service for which to dump destinations */
>  	if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack))
> @@ -3599,7 +3595,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
>  		goto out_err;
>  
>  	/* Dump the destinations */
> -	list_for_each_entry(dest, &svc->destinations, n_list) {
> +	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
>  		if (++idx <= start)
>  			continue;
>  		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
> @@ -3612,7 +3608,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
>  	cb->args[0] = idx;
>  
>  out_err:
> -	mutex_unlock(&ipvs->service_mutex);
> +	rcu_read_unlock();
>  
>  	return skb->len;
>  }
> @@ -3915,9 +3911,12 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
>  	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
>  		need_full_svc = true;
>  
> +	/* We use function that requires RCU lock */
> +	rcu_read_lock();

This is ip_vs_genl_set_cmd path and the new per-netns mutex is held.

I think __ip_vs_service_find() can now just access this mutex to check
if it is held, using fourth parameter:

        list_for_each_entry_rcu(..., lockdep_is_held(&ipvs->service_mutex))

Then this rcu_read_lock() after mutex_lock(&ipvs->service_mutex) can
be removed. I suspect you added it to quiet a rcu debugging splat.

>  	ret = ip_vs_genl_parse_service(ipvs, &usvc,
>  				       info->attrs[IPVS_CMD_ATTR_SERVICE],
>  				       need_full_svc, &svc);
> +	rcu_read_unlock();
>  	if (ret)
>  		goto out;
>  

  parent reply	other threads:[~2025-11-24 21:00 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-19 15:56 [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 01/14] rculist_bl: add hlist_bl_for_each_entry_continue_rcu Julian Anastasov
2025-10-23 11:44   ` Florian Westphal
2025-10-23 13:33     ` Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 02/14] ipvs: make ip_vs_svc_table and ip_vs_svc_fwm_table per netns Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 03/14] ipvs: some service readers can use RCU Julian Anastasov
2025-10-24  2:21   ` Dust Li
2025-11-24 21:00   ` Pablo Neira Ayuso [this message]
2025-11-26 19:39     ` Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 04/14] ipvs: use single svc table Julian Anastasov
2025-11-24 21:07   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 05/14] ipvs: do not keep dest_dst after dest is removed Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 06/14] ipvs: use more counters to avoid service lookups Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 07/14] ipvs: add resizable hash tables Julian Anastasov
2025-11-24 21:16   ` Pablo Neira Ayuso
2025-11-26 20:02     ` Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 08/14] ipvs: use resizable hash table for services Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 09/14] ipvs: switch to per-net connection table Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 10/14] ipvs: show the current conn_tab size to users Julian Anastasov
2025-11-24 21:21   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net Julian Anastasov
2025-11-24 21:29   ` Pablo Neira Ayuso
2025-11-26 20:08     ` Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 12/14] ipvs: use more keys for connection hashing Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 13/14] ipvs: add ip_vs_status info Julian Anastasov
2025-11-24 21:42   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 14/14] ipvs: add conn_lfactor and svc_lfactor sysctl vars Julian Anastasov
2025-11-24 21:46 ` [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Pablo Neira Ayuso
2025-11-26 20:16   ` Julian Anastasov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aSTHJAR5aXml2ms0@calendula \
    --to=pablo@netfilter.org \
    --cc=dust.li@linux.alibaba.com \
    --cc=horms@verge.net.au \
    --cc=ja@ssi.bg \
    --cc=jiejian@linux.alibaba.com \
    --cc=lvs-devel@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=rcu@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.