lvs-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Julian Anastasov <ja@ssi.bg>
Cc: Simon Horman <horms@verge.net.au>,
	lvs-devel@vger.kernel.org, netfilter-devel@vger.kernel.org,
	Dust Li <dust.li@linux.alibaba.com>,
	Jiejian Wu <jiejian@linux.alibaba.com>,
	rcu@vger.kernel.org
Subject: Re: [PATCHv6 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net
Date: Mon, 24 Nov 2025 22:29:29 +0100	[thread overview]
Message-ID: <aSTOOez-GDzaG0LT@calendula> (raw)
In-Reply-To: <20251019155711.67609-12-ja@ssi.bg>

On Sun, Oct 19, 2025 at 06:57:08PM +0300, Julian Anastasov wrote:
> With using per-net conn_tab these counters do not need to be
> global anymore.
> 
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> ---
>  include/net/ip_vs.h             |  2 ++
>  net/netfilter/ipvs/ip_vs_conn.c | 62 ++++++++++++++++++++-------------
>  2 files changed, 39 insertions(+), 25 deletions(-)
> 
> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index ce77800853ab..1b64c5ee2ac2 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -1158,6 +1158,7 @@ struct netns_ipvs {
>  #endif
>  	/* ip_vs_conn */
>  	atomic_t		conn_count;      /* connection counter */
> +	atomic_t		no_cport_conns[IP_VS_AF_MAX];
>  	struct delayed_work	conn_resize_work;/* resize conn_tab */
>  
>  	/* ip_vs_ctl */
> @@ -1188,6 +1189,7 @@ struct netns_ipvs {
>  	int			drop_counter;
>  	int			old_secure_tcp;
>  	atomic_t		dropentry;
> +	s8			dropentry_counters[8];
>  	/* locks in ctl.c */
>  	spinlock_t		dropentry_lock;  /* drop entry handling */
>  	spinlock_t		droppacket_lock; /* drop packet handling */
> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> index bbce5b45b622..55000252c72c 100644
> --- a/net/netfilter/ipvs/ip_vs_conn.c
> +++ b/net/netfilter/ipvs/ip_vs_conn.c
> @@ -54,9 +54,6 @@ int ip_vs_conn_tab_size __read_mostly;
>  /*  SLAB cache for IPVS connections */
>  static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
>  
> -/*  counter for no client port connections */
> -static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
> -
>  /* We need an addrstrlen that works with or without v6 */
>  #ifdef CONFIG_IP_VS_IPV6
>  #define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
> @@ -319,10 +316,16 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
>  	struct ip_vs_conn *cp;
>  
>  	cp = __ip_vs_conn_in_get(p);
> -	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
> -		struct ip_vs_conn_param cport_zero_p = *p;
> -		cport_zero_p.cport = 0;
> -		cp = __ip_vs_conn_in_get(&cport_zero_p);
> +	if (!cp) {
> +		struct netns_ipvs *ipvs = p->ipvs;
> +		int af_id = ip_vs_af_index(p->af);
> +
> +		if (atomic_read(&ipvs->no_cport_conns[af_id])) {
> +			struct ip_vs_conn_param cport_zero_p = *p;
> +
> +			cport_zero_p.cport = 0;
> +			cp = __ip_vs_conn_in_get(&cport_zero_p);
> +		}
>  	}
>  
>  	IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
> @@ -535,6 +538,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
>  {
>  	struct hlist_bl_head *head, *head2, *head_new;
>  	struct netns_ipvs *ipvs = cp->ipvs;
> +	int af_id = ip_vs_af_index(cp->af);
>  	u32 hash_r = 0, hash_key_r = 0;
>  	struct ip_vs_rht *t, *tp, *t2;
>  	u32 hash_key, hash_key_new;
> @@ -613,7 +617,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
>  			hlist_bl_del_rcu(&cp->c_list);
>  			hlist_bl_add_head_rcu(&cp->c_list, head_new);
>  		}
> -		atomic_dec(&ip_vs_conn_no_cport_cnt);
> +		atomic_dec(&ipvs->no_cport_conns[af_id]);
>  		cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
>  		cp->cport = cport;
>  	}
> @@ -1169,8 +1173,11 @@ static void ip_vs_conn_expire(struct timer_list *t)
>  		if (unlikely(cp->app != NULL))
>  			ip_vs_unbind_app(cp);
>  		ip_vs_unbind_dest(cp);
> -		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
> -			atomic_dec(&ip_vs_conn_no_cport_cnt);
> +		if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
> +			int af_id = ip_vs_af_index(cp->af);
> +
> +			atomic_dec(&ipvs->no_cport_conns[af_id]);
> +		}
>  		if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
>  			ip_vs_conn_rcu_free(&cp->rcu_head);
>  		else
> @@ -1277,8 +1284,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
>  	cp->out_seq.delta = 0;
>  
>  	atomic_inc(&ipvs->conn_count);
> -	if (flags & IP_VS_CONN_F_NO_CPORT)
> -		atomic_inc(&ip_vs_conn_no_cport_cnt);
> +	if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) {
> +		int af_id = ip_vs_af_index(cp->af);
> +
> +		atomic_inc(&ipvs->no_cport_conns[af_id]);
> +	}
>  
>  	/* Bind the connection with a destination server */
>  	cp->dest = NULL;
> @@ -1556,6 +1566,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
>  };
>  #endif
>  
> +#ifdef CONFIG_SYSCTL
>  
>  /* Randomly drop connection entries before running out of memory
>   * Can be used for DATA and CTL conns. For TPL conns there are exceptions:
> @@ -1565,12 +1576,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
>   */
>  static inline int todrop_entry(struct ip_vs_conn *cp)
>  {
> -	/*
> -	 * The drop rate array needs tuning for real environments.
> -	 * Called from timer bh only => no locking
> -	 */
> -	static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
> -	static signed char todrop_counter[9] = {0};
> +	struct netns_ipvs *ipvs = cp->ipvs;
>  	int i;
>  
>  	/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
> @@ -1579,15 +1585,17 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
>  	if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
>  		return 0;
>  
> -	/* Don't drop the entry if its number of incoming packets is not
> -	   located in [0, 8] */
> +	/* Drop only conns with number of incoming packets in [1..8] range */
>  	i = atomic_read(&cp->in_pkts);
> -	if (i > 8 || i < 0) return 0;
> +	if (i > 8 || i < 1)

Why did this change? How is this related to the per-netns update?

> +		return 0;
>  
> -	if (!todrop_rate[i]) return 0;
> -	if (--todrop_counter[i] > 0) return 0;
> +	i--;
> +	if (--ipvs->dropentry_counters[i] > 0)
> +		return 0;
>  
> -	todrop_counter[i] = todrop_rate[i];
> +	/* Prefer to drop conns with less number of incoming packets */
> +	ipvs->dropentry_counters[i] = i + 1;
>  	return 1;
>  }
>  
> @@ -1681,7 +1689,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
>  out:
>  	rcu_read_unlock();
>  }
> -
> +#endif
>  
>  /* Flush all the connection entries in the conn_tab */
>  static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
> @@ -1806,7 +1814,11 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
>   */
>  int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
>  {
> +	int idx;
> +
>  	atomic_set(&ipvs->conn_count, 0);
> +	for (idx = 0; idx < IP_VS_AF_MAX; idx++)
> +		atomic_set(&ipvs->no_cport_conns[idx], 0);
>  	INIT_DELAYED_WORK(&ipvs->conn_resize_work, conn_resize_work_handler);
>  	RCU_INIT_POINTER(ipvs->conn_tab, NULL);
>  	atomic_set(&ipvs->conn_tab_changes, 0);
> -- 
> 2.51.0
> 
> 
> 

  reply	other threads:[~2025-11-24 21:29 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-19 15:56 [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 01/14] rculist_bl: add hlist_bl_for_each_entry_continue_rcu Julian Anastasov
2025-10-23 11:44   ` Florian Westphal
2025-10-23 13:33     ` Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 02/14] ipvs: make ip_vs_svc_table and ip_vs_svc_fwm_table per netns Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 03/14] ipvs: some service readers can use RCU Julian Anastasov
2025-10-24  2:21   ` Dust Li
2025-11-24 21:00   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 04/14] ipvs: use single svc table Julian Anastasov
2025-11-24 21:07   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 05/14] ipvs: do not keep dest_dst after dest is removed Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 06/14] ipvs: use more counters to avoid service lookups Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 07/14] ipvs: add resizable hash tables Julian Anastasov
2025-11-24 21:16   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 08/14] ipvs: use resizable hash table for services Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 09/14] ipvs: switch to per-net connection table Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 10/14] ipvs: show the current conn_tab size to users Julian Anastasov
2025-11-24 21:21   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net Julian Anastasov
2025-11-24 21:29   ` Pablo Neira Ayuso [this message]
2025-10-19 15:57 ` [PATCHv6 net-next 12/14] ipvs: use more keys for connection hashing Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 13/14] ipvs: add ip_vs_status info Julian Anastasov
2025-11-24 21:42   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 14/14] ipvs: add conn_lfactor and svc_lfactor sysctl vars Julian Anastasov
2025-11-24 21:46 ` [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Pablo Neira Ayuso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aSTOOez-GDzaG0LT@calendula \
    --to=pablo@netfilter.org \
    --cc=dust.li@linux.alibaba.com \
    --cc=horms@verge.net.au \
    --cc=ja@ssi.bg \
    --cc=jiejian@linux.alibaba.com \
    --cc=lvs-devel@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=rcu@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).