lvs-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Julian Anastasov <ja@ssi.bg>
Cc: Simon Horman <horms@verge.net.au>,
	lvs-devel@vger.kernel.org, netfilter-devel@vger.kernel.org,
	Dust Li <dust.li@linux.alibaba.com>,
	Jiejian Wu <jiejian@linux.alibaba.com>,
	rcu@vger.kernel.org
Subject: Re: [PATCHv6 net-next 04/14] ipvs: use single svc table
Date: Mon, 24 Nov 2025 22:07:30 +0100	[thread overview]
Message-ID: <aSTJEuMYpRuktfBq@calendula> (raw)
In-Reply-To: <20251019155711.67609-5-ja@ssi.bg>

On Sun, Oct 19, 2025 at 06:57:01PM +0300, Julian Anastasov wrote:
> fwmark based services and non-fwmark based services can be hashed
> in same service table. This reduces the burden of working with two
> tables.
> 
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> ---
>  include/net/ip_vs.h            |   8 +-
>  net/netfilter/ipvs/ip_vs_ctl.c | 146 +++++----------------------------
>  2 files changed, 22 insertions(+), 132 deletions(-)

This diffstat is nice. By reading the patch description, I am missing
if this depends on the previous patches (1-3)? Or just a preparatory
patch that can be applied right away?

Thanks.

> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index 074a204ec6db..b5a5a5efe3cc 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -679,8 +679,7 @@ struct ip_vs_dest_user_kern {
>   * forwarding entries.
>   */
>  struct ip_vs_service {
> -	struct hlist_node	s_list;   /* for normal service table */
> -	struct hlist_node	f_list;   /* for fwmark-based service table */
> +	struct hlist_node	s_list;   /* node in service table */
>  	atomic_t		refcnt;   /* reference counter */
>  
>  	u16			af;       /* address family */
> @@ -1050,10 +1049,7 @@ struct netns_ipvs {
>  
>  	/* the service mutex that protect svc_table and svc_fwm_table */
>  	struct mutex service_mutex;
> -	/* the service table hashed by <protocol, addr, port> */
> -	struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE];
> -	/* the service table hashed by fwmark */
> -	struct hlist_head svc_fwm_table[IP_VS_SVC_TAB_SIZE];
> +	struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE];	/* Services */
>  };
>  
>  #define DEFAULT_SYNC_THRESHOLD	3
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index b18d08d79bcb..6c04920f9c87 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -329,7 +329,7 @@ static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32
>  
>  /*
>   *	Hashes a service in the svc_table by <netns,proto,addr,port>
> - *	or in the svc_fwm_table by fwmark.
> + *	or by fwmark.
>   *	Should be called with locked tables.
>   */
>  static int ip_vs_svc_hash(struct ip_vs_service *svc)
> @@ -344,18 +344,17 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
>  
>  	if (svc->fwmark == 0) {
>  		/*
> -		 *  Hash it by <netns,protocol,addr,port> in svc_table
> +		 *  Hash it by <netns,protocol,addr,port>
>  		 */
>  		hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
>  					 &svc->addr, svc->port);
> -		hlist_add_head_rcu(&svc->s_list, &svc->ipvs->svc_table[hash]);
>  	} else {
>  		/*
> -		 *  Hash it by fwmark in svc_fwm_table
> +		 *  Hash it by fwmark
>  		 */
>  		hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
> -		hlist_add_head_rcu(&svc->f_list, &svc->ipvs->svc_fwm_table[hash]);
>  	}
> +	hlist_add_head_rcu(&svc->s_list, &svc->ipvs->svc_table[hash]);
>  
>  	svc->flags |= IP_VS_SVC_F_HASHED;
>  	/* increase its refcnt because it is referenced by the svc table */
> @@ -365,7 +364,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
>  
>  
>  /*
> - *	Unhashes a service from svc_table / svc_fwm_table.
> + *	Unhashes a service from svc_table.
>   *	Should be called with locked tables.
>   */
>  static int ip_vs_svc_unhash(struct ip_vs_service *svc)
> @@ -376,13 +375,8 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
>  		return 0;
>  	}
>  
> -	if (svc->fwmark == 0) {
> -		/* Remove it from the svc_table table */
> -		hlist_del_rcu(&svc->s_list);
> -	} else {
> -		/* Remove it from the svc_fwm_table table */
> -		hlist_del_rcu(&svc->f_list);
> -	}
> +	/* Remove it from svc_table */
> +	hlist_del_rcu(&svc->s_list);
>  
>  	svc->flags &= ~IP_VS_SVC_F_HASHED;
>  	atomic_dec(&svc->refcnt);
> @@ -405,7 +399,8 @@ __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
>  
>  	hlist_for_each_entry_rcu(svc, &ipvs->svc_table[hash], s_list) {
>  		if (svc->af == af && ip_vs_addr_equal(af, &svc->addr, vaddr) &&
> -		    svc->port == vport && svc->protocol == protocol) {
> +		    svc->port == vport && svc->protocol == protocol &&
> +		    !svc->fwmark) {
>  			/* HIT */
>  			return svc;
>  		}
> @@ -427,7 +422,7 @@ __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
>  	/* Check for fwmark addressed entries */
>  	hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
>  
> -	hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[hash], f_list) {
> +	hlist_for_each_entry_rcu(svc, &ipvs->svc_table[hash], s_list) {
>  		if (svc->fwmark == fwmark && svc->af == af) {
>  			/* HIT */
>  			return svc;
> @@ -1683,26 +1678,11 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
>  	struct ip_vs_service *svc;
>  	struct hlist_node *n;
>  
> -	/*
> -	 * Flush the service table hashed by <netns,protocol,addr,port>
> -	 */
>  	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
>  		hlist_for_each_entry_safe(svc, n, &ipvs->svc_table[idx],
> -					  s_list) {
> +					  s_list)
>  			ip_vs_unlink_service(svc, cleanup);
> -		}
>  	}
> -
> -	/*
> -	 * Flush the service table hashed by fwmark
> -	 */
> -	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> -		hlist_for_each_entry_safe(svc, n, &ipvs->svc_fwm_table[idx],
> -					  f_list) {
> -			ip_vs_unlink_service(svc, cleanup);
> -		}
> -	}
> -
>  	return 0;
>  }
>  
> @@ -1765,11 +1745,6 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
>  			list_for_each_entry_rcu(dest, &svc->destinations,
>  						n_list)
>  				ip_vs_forget_dev(dest, dev);
> -
> -		hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[idx], f_list)
> -			list_for_each_entry_rcu(dest, &svc->destinations,
> -						n_list)
> -				ip_vs_forget_dev(dest, dev);
>  	}
>  	rcu_read_unlock();
>  
> @@ -1803,15 +1778,8 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
>  	struct ip_vs_service *svc;
>  
>  	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> -		hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) {
> +		hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list)
>  			ip_vs_zero_service(svc);
> -		}
> -	}
> -
> -	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> -		hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) {
> -			ip_vs_zero_service(svc);
> -		}
>  	}
>  
>  	ip_vs_zero_stats(&ipvs->tot_stats->s);
> @@ -2247,7 +2215,6 @@ static struct ctl_table vs_vars[] = {
>  
>  struct ip_vs_iter {
>  	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
> -	struct hlist_head *table;
>  	int bucket;
>  };
>  
> @@ -2270,7 +2237,6 @@ static inline const char *ip_vs_fwd_name(unsigned int flags)
>  }
>  
>  
> -/* Get the Nth entry in the two lists */
>  static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
>  {
>  	struct net *net = seq_file_net(seq);
> @@ -2279,29 +2245,14 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
>  	int idx;
>  	struct ip_vs_service *svc;
>  
> -	/* look in hash by protocol */
>  	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
>  		hlist_for_each_entry_rcu(svc, &ipvs->svc_table[idx], s_list) {
>  			if (pos-- == 0) {
> -				iter->table = ipvs->svc_table;
> -				iter->bucket = idx;
> -				return svc;
> -			}
> -		}
> -	}
> -
> -	/* keep looking in fwmark */
> -	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> -		hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[idx],
> -					 f_list) {
> -			if (pos-- == 0) {
> -				iter->table = ipvs->svc_fwm_table;
>  				iter->bucket = idx;
>  				return svc;
>  			}
>  		}
>  	}
> -
>  	return NULL;
>  }
>  
> @@ -2328,38 +2279,17 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
>  	svc = v;
>  	iter = seq->private;
>  
> -	if (iter->table == ipvs->svc_table) {
> -		/* next service in table hashed by protocol */
> -		e = rcu_dereference(hlist_next_rcu(&svc->s_list));
> -		if (e)
> -			return hlist_entry(e, struct ip_vs_service, s_list);
> -
> -		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
> -			hlist_for_each_entry_rcu(svc,
> -						 &ipvs->svc_table[iter->bucket],
> -						 s_list) {
> -				return svc;
> -			}
> -		}
> -
> -		iter->table = ipvs->svc_fwm_table;
> -		iter->bucket = -1;
> -		goto scan_fwmark;
> -	}
> -
> -	/* next service in hashed by fwmark */
> -	e = rcu_dereference(hlist_next_rcu(&svc->f_list));
> +	e = rcu_dereference(hlist_next_rcu(&svc->s_list));
>  	if (e)
> -		return hlist_entry(e, struct ip_vs_service, f_list);
> +		return hlist_entry(e, struct ip_vs_service, s_list);
>  
> - scan_fwmark:
>  	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
>  		hlist_for_each_entry_rcu(svc,
> -					 &ipvs->svc_fwm_table[iter->bucket],
> -					 f_list)
> +					 &ipvs->svc_table[iter->bucket],
> +					 s_list) {
>  			return svc;
> +		}
>  	}
> -
>  	return NULL;
>  }
>  
> @@ -2381,17 +2311,12 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
>  		seq_puts(seq,
>  			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
>  	} else {
> -		struct net *net = seq_file_net(seq);
> -		struct netns_ipvs *ipvs = net_ipvs(net);
>  		const struct ip_vs_service *svc = v;
> -		const struct ip_vs_iter *iter = seq->private;
>  		const struct ip_vs_dest *dest;
>  		struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
>  		char *sched_name = sched ? sched->name : "none";
>  
> -		if (svc->ipvs != ipvs)
> -			return 0;
> -		if (iter->table == ipvs->svc_table) {
> +		if (!svc->fwmark) {
>  #ifdef CONFIG_IP_VS_IPV6
>  			if (svc->af == AF_INET6)
>  				seq_printf(seq, "%s  [%pI6]:%04X %s ",
> @@ -2866,24 +2791,6 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs,
>  		}
>  	}
>  
> -	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> -		hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) {
> -			/* Only expose IPv4 entries to old interface */
> -			if (svc->af != AF_INET)
> -				continue;
> -
> -			if (count >= get->num_services)
> -				goto out;
> -			memset(&entry, 0, sizeof(entry));
> -			ip_vs_copy_service(&entry, svc);
> -			if (copy_to_user(&uptr->entrytable[count],
> -					 &entry, sizeof(entry))) {
> -				ret = -EFAULT;
> -				goto out;
> -			}
> -			count++;
> -		}
> -	}
>  out:
>  	return ret;
>  }
> @@ -3384,17 +3291,6 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
>  		}
>  	}
>  
> -	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> -		hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[i], f_list) {
> -			if (++idx <= start)
> -				continue;
> -			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> -				idx--;
> -				goto nla_put_failure;
> -			}
> -		}
> -	}
> -
>  nla_put_failure:
>  	rcu_read_unlock();
>  	cb->args[0] = idx;
> @@ -4404,12 +4300,10 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
>  	int ret = -ENOMEM;
>  	int idx;
>  
> -	/* Initialize service_mutex, svc_table, svc_fwm_table per netns */
> +	/* Initialize service_mutex, svc_table per netns */
>  	__mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key);
> -	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> +	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)
>  		INIT_HLIST_HEAD(&ipvs->svc_table[idx]);
> -		INIT_HLIST_HEAD(&ipvs->svc_fwm_table[idx]);
> -	}
>  
>  	/* Initialize rs_table */
>  	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
> -- 
> 2.51.0
> 
> 
> 

  reply	other threads:[~2025-11-24 21:07 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-19 15:56 [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 01/14] rculist_bl: add hlist_bl_for_each_entry_continue_rcu Julian Anastasov
2025-10-23 11:44   ` Florian Westphal
2025-10-23 13:33     ` Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 02/14] ipvs: make ip_vs_svc_table and ip_vs_svc_fwm_table per netns Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 03/14] ipvs: some service readers can use RCU Julian Anastasov
2025-10-24  2:21   ` Dust Li
2025-11-24 21:00   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 04/14] ipvs: use single svc table Julian Anastasov
2025-11-24 21:07   ` Pablo Neira Ayuso [this message]
2025-10-19 15:57 ` [PATCHv6 net-next 05/14] ipvs: do not keep dest_dst after dest is removed Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 06/14] ipvs: use more counters to avoid service lookups Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 07/14] ipvs: add resizable hash tables Julian Anastasov
2025-11-24 21:16   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 08/14] ipvs: use resizable hash table for services Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 09/14] ipvs: switch to per-net connection table Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 10/14] ipvs: show the current conn_tab size to users Julian Anastasov
2025-11-24 21:21   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net Julian Anastasov
2025-11-24 21:29   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 12/14] ipvs: use more keys for connection hashing Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 13/14] ipvs: add ip_vs_status info Julian Anastasov
2025-11-24 21:42   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 14/14] ipvs: add conn_lfactor and svc_lfactor sysctl vars Julian Anastasov
2025-11-24 21:46 ` [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Pablo Neira Ayuso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aSTJEuMYpRuktfBq@calendula \
    --to=pablo@netfilter.org \
    --cc=dust.li@linux.alibaba.com \
    --cc=horms@verge.net.au \
    --cc=ja@ssi.bg \
    --cc=jiejian@linux.alibaba.com \
    --cc=lvs-devel@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=rcu@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).