From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Julian Anastasov <ja@ssi.bg>
Cc: Simon Horman <horms@verge.net.au>,
lvs-devel@vger.kernel.org, netfilter-devel@vger.kernel.org,
Dust Li <dust.li@linux.alibaba.com>,
Jiejian Wu <jiejian@linux.alibaba.com>,
rcu@vger.kernel.org
Subject: Re: [PATCHv6 net-next 04/14] ipvs: use single svc table
Date: Mon, 24 Nov 2025 22:07:30 +0100 [thread overview]
Message-ID: <aSTJEuMYpRuktfBq@calendula> (raw)
In-Reply-To: <20251019155711.67609-5-ja@ssi.bg>
On Sun, Oct 19, 2025 at 06:57:01PM +0300, Julian Anastasov wrote:
> fwmark based services and non-fwmark based services can be hashed
> in same service table. This reduces the burden of working with two
> tables.
>
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> ---
> include/net/ip_vs.h | 8 +-
> net/netfilter/ipvs/ip_vs_ctl.c | 146 +++++----------------------------
> 2 files changed, 22 insertions(+), 132 deletions(-)
This diffstat is nice. By reading the patch description, I am missing
if this depends on the previous patches (1-3)? Or just a preparatory
patch that can be applied right away?
Thanks.
> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index 074a204ec6db..b5a5a5efe3cc 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -679,8 +679,7 @@ struct ip_vs_dest_user_kern {
> * forwarding entries.
> */
> struct ip_vs_service {
> - struct hlist_node s_list; /* for normal service table */
> - struct hlist_node f_list; /* for fwmark-based service table */
> + struct hlist_node s_list; /* node in service table */
> atomic_t refcnt; /* reference counter */
>
> u16 af; /* address family */
> @@ -1050,10 +1049,7 @@ struct netns_ipvs {
>
> /* the service mutex that protect svc_table and svc_fwm_table */
> struct mutex service_mutex;
> - /* the service table hashed by <protocol, addr, port> */
> - struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE];
> - /* the service table hashed by fwmark */
> - struct hlist_head svc_fwm_table[IP_VS_SVC_TAB_SIZE];
> + struct hlist_head svc_table[IP_VS_SVC_TAB_SIZE]; /* Services */
> };
>
> #define DEFAULT_SYNC_THRESHOLD 3
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index b18d08d79bcb..6c04920f9c87 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -329,7 +329,7 @@ static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32
>
> /*
> * Hashes a service in the svc_table by <netns,proto,addr,port>
> - * or in the svc_fwm_table by fwmark.
> + * or by fwmark.
> * Should be called with locked tables.
> */
> static int ip_vs_svc_hash(struct ip_vs_service *svc)
> @@ -344,18 +344,17 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
>
> if (svc->fwmark == 0) {
> /*
> - * Hash it by <netns,protocol,addr,port> in svc_table
> + * Hash it by <netns,protocol,addr,port>
> */
> hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
> &svc->addr, svc->port);
> - hlist_add_head_rcu(&svc->s_list, &svc->ipvs->svc_table[hash]);
> } else {
> /*
> - * Hash it by fwmark in svc_fwm_table
> + * Hash it by fwmark
> */
> hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
> - hlist_add_head_rcu(&svc->f_list, &svc->ipvs->svc_fwm_table[hash]);
> }
> + hlist_add_head_rcu(&svc->s_list, &svc->ipvs->svc_table[hash]);
>
> svc->flags |= IP_VS_SVC_F_HASHED;
> /* increase its refcnt because it is referenced by the svc table */
> @@ -365,7 +364,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
>
>
> /*
> - * Unhashes a service from svc_table / svc_fwm_table.
> + * Unhashes a service from svc_table.
> * Should be called with locked tables.
> */
> static int ip_vs_svc_unhash(struct ip_vs_service *svc)
> @@ -376,13 +375,8 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
> return 0;
> }
>
> - if (svc->fwmark == 0) {
> - /* Remove it from the svc_table table */
> - hlist_del_rcu(&svc->s_list);
> - } else {
> - /* Remove it from the svc_fwm_table table */
> - hlist_del_rcu(&svc->f_list);
> - }
> + /* Remove it from svc_table */
> + hlist_del_rcu(&svc->s_list);
>
> svc->flags &= ~IP_VS_SVC_F_HASHED;
> atomic_dec(&svc->refcnt);
> @@ -405,7 +399,8 @@ __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
>
> hlist_for_each_entry_rcu(svc, &ipvs->svc_table[hash], s_list) {
> if (svc->af == af && ip_vs_addr_equal(af, &svc->addr, vaddr) &&
> - svc->port == vport && svc->protocol == protocol) {
> + svc->port == vport && svc->protocol == protocol &&
> + !svc->fwmark) {
> /* HIT */
> return svc;
> }
> @@ -427,7 +422,7 @@ __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
> /* Check for fwmark addressed entries */
> hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
>
> - hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[hash], f_list) {
> + hlist_for_each_entry_rcu(svc, &ipvs->svc_table[hash], s_list) {
> if (svc->fwmark == fwmark && svc->af == af) {
> /* HIT */
> return svc;
> @@ -1683,26 +1678,11 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
> struct ip_vs_service *svc;
> struct hlist_node *n;
>
> - /*
> - * Flush the service table hashed by <netns,protocol,addr,port>
> - */
> for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> hlist_for_each_entry_safe(svc, n, &ipvs->svc_table[idx],
> - s_list) {
> + s_list)
> ip_vs_unlink_service(svc, cleanup);
> - }
> }
> -
> - /*
> - * Flush the service table hashed by fwmark
> - */
> - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> - hlist_for_each_entry_safe(svc, n, &ipvs->svc_fwm_table[idx],
> - f_list) {
> - ip_vs_unlink_service(svc, cleanup);
> - }
> - }
> -
> return 0;
> }
>
> @@ -1765,11 +1745,6 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
> list_for_each_entry_rcu(dest, &svc->destinations,
> n_list)
> ip_vs_forget_dev(dest, dev);
> -
> - hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[idx], f_list)
> - list_for_each_entry_rcu(dest, &svc->destinations,
> - n_list)
> - ip_vs_forget_dev(dest, dev);
> }
> rcu_read_unlock();
>
> @@ -1803,15 +1778,8 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
> struct ip_vs_service *svc;
>
> for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> - hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list) {
> + hlist_for_each_entry(svc, &ipvs->svc_table[idx], s_list)
> ip_vs_zero_service(svc);
> - }
> - }
> -
> - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> - hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) {
> - ip_vs_zero_service(svc);
> - }
> }
>
> ip_vs_zero_stats(&ipvs->tot_stats->s);
> @@ -2247,7 +2215,6 @@ static struct ctl_table vs_vars[] = {
>
> struct ip_vs_iter {
> struct seq_net_private p; /* Do not move this, netns depends upon it*/
> - struct hlist_head *table;
> int bucket;
> };
>
> @@ -2270,7 +2237,6 @@ static inline const char *ip_vs_fwd_name(unsigned int flags)
> }
>
>
> -/* Get the Nth entry in the two lists */
> static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
> {
> struct net *net = seq_file_net(seq);
> @@ -2279,29 +2245,14 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
> int idx;
> struct ip_vs_service *svc;
>
> - /* look in hash by protocol */
> for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> hlist_for_each_entry_rcu(svc, &ipvs->svc_table[idx], s_list) {
> if (pos-- == 0) {
> - iter->table = ipvs->svc_table;
> - iter->bucket = idx;
> - return svc;
> - }
> - }
> - }
> -
> - /* keep looking in fwmark */
> - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> - hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[idx],
> - f_list) {
> - if (pos-- == 0) {
> - iter->table = ipvs->svc_fwm_table;
> iter->bucket = idx;
> return svc;
> }
> }
> }
> -
> return NULL;
> }
>
> @@ -2328,38 +2279,17 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> svc = v;
> iter = seq->private;
>
> - if (iter->table == ipvs->svc_table) {
> - /* next service in table hashed by protocol */
> - e = rcu_dereference(hlist_next_rcu(&svc->s_list));
> - if (e)
> - return hlist_entry(e, struct ip_vs_service, s_list);
> -
> - while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
> - hlist_for_each_entry_rcu(svc,
> - &ipvs->svc_table[iter->bucket],
> - s_list) {
> - return svc;
> - }
> - }
> -
> - iter->table = ipvs->svc_fwm_table;
> - iter->bucket = -1;
> - goto scan_fwmark;
> - }
> -
> - /* next service in hashed by fwmark */
> - e = rcu_dereference(hlist_next_rcu(&svc->f_list));
> + e = rcu_dereference(hlist_next_rcu(&svc->s_list));
> if (e)
> - return hlist_entry(e, struct ip_vs_service, f_list);
> + return hlist_entry(e, struct ip_vs_service, s_list);
>
> - scan_fwmark:
> while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
> hlist_for_each_entry_rcu(svc,
> - &ipvs->svc_fwm_table[iter->bucket],
> - f_list)
> + &ipvs->svc_table[iter->bucket],
> + s_list) {
> return svc;
> + }
> }
> -
> return NULL;
> }
>
> @@ -2381,17 +2311,12 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
> seq_puts(seq,
> " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
> } else {
> - struct net *net = seq_file_net(seq);
> - struct netns_ipvs *ipvs = net_ipvs(net);
> const struct ip_vs_service *svc = v;
> - const struct ip_vs_iter *iter = seq->private;
> const struct ip_vs_dest *dest;
> struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
> char *sched_name = sched ? sched->name : "none";
>
> - if (svc->ipvs != ipvs)
> - return 0;
> - if (iter->table == ipvs->svc_table) {
> + if (!svc->fwmark) {
> #ifdef CONFIG_IP_VS_IPV6
> if (svc->af == AF_INET6)
> seq_printf(seq, "%s [%pI6]:%04X %s ",
> @@ -2866,24 +2791,6 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs,
> }
> }
>
> - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> - hlist_for_each_entry(svc, &ipvs->svc_fwm_table[idx], f_list) {
> - /* Only expose IPv4 entries to old interface */
> - if (svc->af != AF_INET)
> - continue;
> -
> - if (count >= get->num_services)
> - goto out;
> - memset(&entry, 0, sizeof(entry));
> - ip_vs_copy_service(&entry, svc);
> - if (copy_to_user(&uptr->entrytable[count],
> - &entry, sizeof(entry))) {
> - ret = -EFAULT;
> - goto out;
> - }
> - count++;
> - }
> - }
> out:
> return ret;
> }
> @@ -3384,17 +3291,6 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
> }
> }
>
> - for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> - hlist_for_each_entry_rcu(svc, &ipvs->svc_fwm_table[i], f_list) {
> - if (++idx <= start)
> - continue;
> - if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> - idx--;
> - goto nla_put_failure;
> - }
> - }
> - }
> -
> nla_put_failure:
> rcu_read_unlock();
> cb->args[0] = idx;
> @@ -4404,12 +4300,10 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
> int ret = -ENOMEM;
> int idx;
>
> - /* Initialize service_mutex, svc_table, svc_fwm_table per netns */
> + /* Initialize service_mutex, svc_table per netns */
> __mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key);
> - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
> + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)
> INIT_HLIST_HEAD(&ipvs->svc_table[idx]);
> - INIT_HLIST_HEAD(&ipvs->svc_fwm_table[idx]);
> - }
>
> /* Initialize rs_table */
> for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
> --
> 2.51.0
>
>
>
next prev parent reply other threads:[~2025-11-24 21:07 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-19 15:56 [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 01/14] rculist_bl: add hlist_bl_for_each_entry_continue_rcu Julian Anastasov
2025-10-23 11:44 ` Florian Westphal
2025-10-23 13:33 ` Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 02/14] ipvs: make ip_vs_svc_table and ip_vs_svc_fwm_table per netns Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 03/14] ipvs: some service readers can use RCU Julian Anastasov
2025-10-24 2:21 ` Dust Li
2025-11-24 21:00 ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 04/14] ipvs: use single svc table Julian Anastasov
2025-11-24 21:07 ` Pablo Neira Ayuso [this message]
2025-10-19 15:57 ` [PATCHv6 net-next 05/14] ipvs: do not keep dest_dst after dest is removed Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 06/14] ipvs: use more counters to avoid service lookups Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 07/14] ipvs: add resizable hash tables Julian Anastasov
2025-11-24 21:16 ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 08/14] ipvs: use resizable hash table for services Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 09/14] ipvs: switch to per-net connection table Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 10/14] ipvs: show the current conn_tab size to users Julian Anastasov
2025-11-24 21:21 ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net Julian Anastasov
2025-11-24 21:29 ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 12/14] ipvs: use more keys for connection hashing Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 13/14] ipvs: add ip_vs_status info Julian Anastasov
2025-11-24 21:42 ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 14/14] ipvs: add conn_lfactor and svc_lfactor sysctl vars Julian Anastasov
2025-11-24 21:46 ` [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aSTJEuMYpRuktfBq@calendula \
--to=pablo@netfilter.org \
--cc=dust.li@linux.alibaba.com \
--cc=horms@verge.net.au \
--cc=ja@ssi.bg \
--cc=jiejian@linux.alibaba.com \
--cc=lvs-devel@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=rcu@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).