From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Julian Anastasov <ja@ssi.bg>
Cc: Simon Horman <horms@verge.net.au>,
lvs-devel@vger.kernel.org, netfilter-devel@vger.kernel.org,
Dust Li <dust.li@linux.alibaba.com>,
Jiejian Wu <jiejian@linux.alibaba.com>,
rcu@vger.kernel.org
Subject: Re: [PATCHv6 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net
Date: Mon, 24 Nov 2025 22:29:29 +0100 [thread overview]
Message-ID: <aSTOOez-GDzaG0LT@calendula> (raw)
In-Reply-To: <20251019155711.67609-12-ja@ssi.bg>
On Sun, Oct 19, 2025 at 06:57:08PM +0300, Julian Anastasov wrote:
> With using per-net conn_tab these counters do not need to be
> global anymore.
>
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> ---
> include/net/ip_vs.h | 2 ++
> net/netfilter/ipvs/ip_vs_conn.c | 62 ++++++++++++++++++++-------------
> 2 files changed, 39 insertions(+), 25 deletions(-)
>
> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index ce77800853ab..1b64c5ee2ac2 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -1158,6 +1158,7 @@ struct netns_ipvs {
> #endif
> /* ip_vs_conn */
> atomic_t conn_count; /* connection counter */
> + atomic_t no_cport_conns[IP_VS_AF_MAX];
> struct delayed_work conn_resize_work;/* resize conn_tab */
>
> /* ip_vs_ctl */
> @@ -1188,6 +1189,7 @@ struct netns_ipvs {
> int drop_counter;
> int old_secure_tcp;
> atomic_t dropentry;
> + s8 dropentry_counters[8];
> /* locks in ctl.c */
> spinlock_t dropentry_lock; /* drop entry handling */
> spinlock_t droppacket_lock; /* drop packet handling */
> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> index bbce5b45b622..55000252c72c 100644
> --- a/net/netfilter/ipvs/ip_vs_conn.c
> +++ b/net/netfilter/ipvs/ip_vs_conn.c
> @@ -54,9 +54,6 @@ int ip_vs_conn_tab_size __read_mostly;
> /* SLAB cache for IPVS connections */
> static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
>
> -/* counter for no client port connections */
> -static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
> -
> /* We need an addrstrlen that works with or without v6 */
> #ifdef CONFIG_IP_VS_IPV6
> #define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
> @@ -319,10 +316,16 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
> struct ip_vs_conn *cp;
>
> cp = __ip_vs_conn_in_get(p);
> - if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
> - struct ip_vs_conn_param cport_zero_p = *p;
> - cport_zero_p.cport = 0;
> - cp = __ip_vs_conn_in_get(&cport_zero_p);
> + if (!cp) {
> + struct netns_ipvs *ipvs = p->ipvs;
> + int af_id = ip_vs_af_index(p->af);
> +
> + if (atomic_read(&ipvs->no_cport_conns[af_id])) {
> + struct ip_vs_conn_param cport_zero_p = *p;
> +
> + cport_zero_p.cport = 0;
> + cp = __ip_vs_conn_in_get(&cport_zero_p);
> + }
> }
>
> IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
> @@ -535,6 +538,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
> {
> struct hlist_bl_head *head, *head2, *head_new;
> struct netns_ipvs *ipvs = cp->ipvs;
> + int af_id = ip_vs_af_index(cp->af);
> u32 hash_r = 0, hash_key_r = 0;
> struct ip_vs_rht *t, *tp, *t2;
> u32 hash_key, hash_key_new;
> @@ -613,7 +617,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
> hlist_bl_del_rcu(&cp->c_list);
> hlist_bl_add_head_rcu(&cp->c_list, head_new);
> }
> - atomic_dec(&ip_vs_conn_no_cport_cnt);
> + atomic_dec(&ipvs->no_cport_conns[af_id]);
> cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
> cp->cport = cport;
> }
> @@ -1169,8 +1173,11 @@ static void ip_vs_conn_expire(struct timer_list *t)
> if (unlikely(cp->app != NULL))
> ip_vs_unbind_app(cp);
> ip_vs_unbind_dest(cp);
> - if (cp->flags & IP_VS_CONN_F_NO_CPORT)
> - atomic_dec(&ip_vs_conn_no_cport_cnt);
> + if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
> + int af_id = ip_vs_af_index(cp->af);
> +
> + atomic_dec(&ipvs->no_cport_conns[af_id]);
> + }
> if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
> ip_vs_conn_rcu_free(&cp->rcu_head);
> else
> @@ -1277,8 +1284,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
> cp->out_seq.delta = 0;
>
> atomic_inc(&ipvs->conn_count);
> - if (flags & IP_VS_CONN_F_NO_CPORT)
> - atomic_inc(&ip_vs_conn_no_cport_cnt);
> + if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) {
> + int af_id = ip_vs_af_index(cp->af);
> +
> + atomic_inc(&ipvs->no_cport_conns[af_id]);
> + }
>
> /* Bind the connection with a destination server */
> cp->dest = NULL;
> @@ -1556,6 +1566,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
> };
> #endif
>
> +#ifdef CONFIG_SYSCTL
>
> /* Randomly drop connection entries before running out of memory
> * Can be used for DATA and CTL conns. For TPL conns there are exceptions:
> @@ -1565,12 +1576,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
> */
> static inline int todrop_entry(struct ip_vs_conn *cp)
> {
> - /*
> - * The drop rate array needs tuning for real environments.
> - * Called from timer bh only => no locking
> - */
> - static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
> - static signed char todrop_counter[9] = {0};
> + struct netns_ipvs *ipvs = cp->ipvs;
> int i;
>
> /* if the conn entry hasn't lasted for 60 seconds, don't drop it.
> @@ -1579,15 +1585,17 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
> if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
> return 0;
>
> - /* Don't drop the entry if its number of incoming packets is not
> - located in [0, 8] */
> + /* Drop only conns with number of incoming packets in [1..8] range */
> i = atomic_read(&cp->in_pkts);
> - if (i > 8 || i < 0) return 0;
> + if (i > 8 || i < 1)
Why did this change? How is this related to the per-netns update?
> + return 0;
>
> - if (!todrop_rate[i]) return 0;
> - if (--todrop_counter[i] > 0) return 0;
> + i--;
> + if (--ipvs->dropentry_counters[i] > 0)
> + return 0;
>
> - todrop_counter[i] = todrop_rate[i];
> + /* Prefer to drop conns with less number of incoming packets */
> + ipvs->dropentry_counters[i] = i + 1;
> return 1;
> }
>
> @@ -1681,7 +1689,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
> out:
> rcu_read_unlock();
> }
> -
> +#endif
>
> /* Flush all the connection entries in the conn_tab */
> static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
> @@ -1806,7 +1814,11 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
> */
> int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
> {
> + int idx;
> +
> atomic_set(&ipvs->conn_count, 0);
> + for (idx = 0; idx < IP_VS_AF_MAX; idx++)
> + atomic_set(&ipvs->no_cport_conns[idx], 0);
> INIT_DELAYED_WORK(&ipvs->conn_resize_work, conn_resize_work_handler);
> RCU_INIT_POINTER(ipvs->conn_tab, NULL);
> atomic_set(&ipvs->conn_tab_changes, 0);
> --
> 2.51.0
>
>
>
next prev parent reply other threads:[~2025-11-24 21:29 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-19 15:56 [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 01/14] rculist_bl: add hlist_bl_for_each_entry_continue_rcu Julian Anastasov
2025-10-23 11:44 ` Florian Westphal
2025-10-23 13:33 ` Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 02/14] ipvs: make ip_vs_svc_table and ip_vs_svc_fwm_table per netns Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 03/14] ipvs: some service readers can use RCU Julian Anastasov
2025-10-24 2:21 ` Dust Li
2025-11-24 21:00 ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 04/14] ipvs: use single svc table Julian Anastasov
2025-11-24 21:07 ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 05/14] ipvs: do not keep dest_dst after dest is removed Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 06/14] ipvs: use more counters to avoid service lookups Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 07/14] ipvs: add resizable hash tables Julian Anastasov
2025-11-24 21:16 ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 08/14] ipvs: use resizable hash table for services Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 09/14] ipvs: switch to per-net connection table Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 10/14] ipvs: show the current conn_tab size to users Julian Anastasov
2025-11-24 21:21 ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net Julian Anastasov
2025-11-24 21:29 ` Pablo Neira Ayuso [this message]
2025-10-19 15:57 ` [PATCHv6 net-next 12/14] ipvs: use more keys for connection hashing Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 13/14] ipvs: add ip_vs_status info Julian Anastasov
2025-11-24 21:42 ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 14/14] ipvs: add conn_lfactor and svc_lfactor sysctl vars Julian Anastasov
2025-11-24 21:46 ` [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aSTOOez-GDzaG0LT@calendula \
--to=pablo@netfilter.org \
--cc=dust.li@linux.alibaba.com \
--cc=horms@verge.net.au \
--cc=ja@ssi.bg \
--cc=jiejian@linux.alibaba.com \
--cc=lvs-devel@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=rcu@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).