All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Julian Anastasov <ja@ssi.bg>
Cc: Simon Horman <horms@verge.net.au>,
	lvs-devel@vger.kernel.org, netfilter-devel@vger.kernel.org,
	Dust Li <dust.li@linux.alibaba.com>,
	Jiejian Wu <jiejian@linux.alibaba.com>,
	rcu@vger.kernel.org
Subject: Re: [PATCHv6 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net
Date: Mon, 24 Nov 2025 22:29:29 +0100	[thread overview]
Message-ID: <aSTOOez-GDzaG0LT@calendula> (raw)
In-Reply-To: <20251019155711.67609-12-ja@ssi.bg>

On Sun, Oct 19, 2025 at 06:57:08PM +0300, Julian Anastasov wrote:
> With using per-net conn_tab these counters do not need to be
> global anymore.
> 
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> ---
>  include/net/ip_vs.h             |  2 ++
>  net/netfilter/ipvs/ip_vs_conn.c | 62 ++++++++++++++++++++-------------
>  2 files changed, 39 insertions(+), 25 deletions(-)
> 
> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index ce77800853ab..1b64c5ee2ac2 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -1158,6 +1158,7 @@ struct netns_ipvs {
>  #endif
>  	/* ip_vs_conn */
>  	atomic_t		conn_count;      /* connection counter */
> +	atomic_t		no_cport_conns[IP_VS_AF_MAX];
>  	struct delayed_work	conn_resize_work;/* resize conn_tab */
>  
>  	/* ip_vs_ctl */
> @@ -1188,6 +1189,7 @@ struct netns_ipvs {
>  	int			drop_counter;
>  	int			old_secure_tcp;
>  	atomic_t		dropentry;
> +	s8			dropentry_counters[8];
>  	/* locks in ctl.c */
>  	spinlock_t		dropentry_lock;  /* drop entry handling */
>  	spinlock_t		droppacket_lock; /* drop packet handling */
> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
> index bbce5b45b622..55000252c72c 100644
> --- a/net/netfilter/ipvs/ip_vs_conn.c
> +++ b/net/netfilter/ipvs/ip_vs_conn.c
> @@ -54,9 +54,6 @@ int ip_vs_conn_tab_size __read_mostly;
>  /*  SLAB cache for IPVS connections */
>  static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
>  
> -/*  counter for no client port connections */
> -static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
> -
>  /* We need an addrstrlen that works with or without v6 */
>  #ifdef CONFIG_IP_VS_IPV6
>  #define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
> @@ -319,10 +316,16 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
>  	struct ip_vs_conn *cp;
>  
>  	cp = __ip_vs_conn_in_get(p);
> -	if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
> -		struct ip_vs_conn_param cport_zero_p = *p;
> -		cport_zero_p.cport = 0;
> -		cp = __ip_vs_conn_in_get(&cport_zero_p);
> +	if (!cp) {
> +		struct netns_ipvs *ipvs = p->ipvs;
> +		int af_id = ip_vs_af_index(p->af);
> +
> +		if (atomic_read(&ipvs->no_cport_conns[af_id])) {
> +			struct ip_vs_conn_param cport_zero_p = *p;
> +
> +			cport_zero_p.cport = 0;
> +			cp = __ip_vs_conn_in_get(&cport_zero_p);
> +		}
>  	}
>  
>  	IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
> @@ -535,6 +538,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
>  {
>  	struct hlist_bl_head *head, *head2, *head_new;
>  	struct netns_ipvs *ipvs = cp->ipvs;
> +	int af_id = ip_vs_af_index(cp->af);
>  	u32 hash_r = 0, hash_key_r = 0;
>  	struct ip_vs_rht *t, *tp, *t2;
>  	u32 hash_key, hash_key_new;
> @@ -613,7 +617,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
>  			hlist_bl_del_rcu(&cp->c_list);
>  			hlist_bl_add_head_rcu(&cp->c_list, head_new);
>  		}
> -		atomic_dec(&ip_vs_conn_no_cport_cnt);
> +		atomic_dec(&ipvs->no_cport_conns[af_id]);
>  		cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
>  		cp->cport = cport;
>  	}
> @@ -1169,8 +1173,11 @@ static void ip_vs_conn_expire(struct timer_list *t)
>  		if (unlikely(cp->app != NULL))
>  			ip_vs_unbind_app(cp);
>  		ip_vs_unbind_dest(cp);
> -		if (cp->flags & IP_VS_CONN_F_NO_CPORT)
> -			atomic_dec(&ip_vs_conn_no_cport_cnt);
> +		if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
> +			int af_id = ip_vs_af_index(cp->af);
> +
> +			atomic_dec(&ipvs->no_cport_conns[af_id]);
> +		}
>  		if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
>  			ip_vs_conn_rcu_free(&cp->rcu_head);
>  		else
> @@ -1277,8 +1284,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
>  	cp->out_seq.delta = 0;
>  
>  	atomic_inc(&ipvs->conn_count);
> -	if (flags & IP_VS_CONN_F_NO_CPORT)
> -		atomic_inc(&ip_vs_conn_no_cport_cnt);
> +	if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) {
> +		int af_id = ip_vs_af_index(cp->af);
> +
> +		atomic_inc(&ipvs->no_cport_conns[af_id]);
> +	}
>  
>  	/* Bind the connection with a destination server */
>  	cp->dest = NULL;
> @@ -1556,6 +1566,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
>  };
>  #endif
>  
> +#ifdef CONFIG_SYSCTL
>  
>  /* Randomly drop connection entries before running out of memory
>   * Can be used for DATA and CTL conns. For TPL conns there are exceptions:
> @@ -1565,12 +1576,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
>   */
>  static inline int todrop_entry(struct ip_vs_conn *cp)
>  {
> -	/*
> -	 * The drop rate array needs tuning for real environments.
> -	 * Called from timer bh only => no locking
> -	 */
> -	static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
> -	static signed char todrop_counter[9] = {0};
> +	struct netns_ipvs *ipvs = cp->ipvs;
>  	int i;
>  
>  	/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
> @@ -1579,15 +1585,17 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
>  	if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
>  		return 0;
>  
> -	/* Don't drop the entry if its number of incoming packets is not
> -	   located in [0, 8] */
> +	/* Drop only conns with number of incoming packets in [1..8] range */
>  	i = atomic_read(&cp->in_pkts);
> -	if (i > 8 || i < 0) return 0;
> +	if (i > 8 || i < 1)

Why did this change? How is this related to the per-netns update?

> +		return 0;
>  
> -	if (!todrop_rate[i]) return 0;
> -	if (--todrop_counter[i] > 0) return 0;
> +	i--;
> +	if (--ipvs->dropentry_counters[i] > 0)
> +		return 0;
>  
> -	todrop_counter[i] = todrop_rate[i];
> +	/* Prefer to drop conns with less number of incoming packets */
> +	ipvs->dropentry_counters[i] = i + 1;
>  	return 1;
>  }
>  
> @@ -1681,7 +1689,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
>  out:
>  	rcu_read_unlock();
>  }
> -
> +#endif
>  
>  /* Flush all the connection entries in the conn_tab */
>  static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
> @@ -1806,7 +1814,11 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
>   */
>  int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
>  {
> +	int idx;
> +
>  	atomic_set(&ipvs->conn_count, 0);
> +	for (idx = 0; idx < IP_VS_AF_MAX; idx++)
> +		atomic_set(&ipvs->no_cport_conns[idx], 0);
>  	INIT_DELAYED_WORK(&ipvs->conn_resize_work, conn_resize_work_handler);
>  	RCU_INIT_POINTER(ipvs->conn_tab, NULL);
>  	atomic_set(&ipvs->conn_tab_changes, 0);
> -- 
> 2.51.0
> 
> 
> 

  reply	other threads:[~2025-11-24 21:29 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-19 15:56 [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 01/14] rculist_bl: add hlist_bl_for_each_entry_continue_rcu Julian Anastasov
2025-10-23 11:44   ` Florian Westphal
2025-10-23 13:33     ` Julian Anastasov
2025-10-19 15:56 ` [PATCHv6 net-next 02/14] ipvs: make ip_vs_svc_table and ip_vs_svc_fwm_table per netns Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 03/14] ipvs: some service readers can use RCU Julian Anastasov
2025-10-24  2:21   ` Dust Li
2025-11-24 21:00   ` Pablo Neira Ayuso
2025-11-26 19:39     ` Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 04/14] ipvs: use single svc table Julian Anastasov
2025-11-24 21:07   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 05/14] ipvs: do not keep dest_dst after dest is removed Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 06/14] ipvs: use more counters to avoid service lookups Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 07/14] ipvs: add resizable hash tables Julian Anastasov
2025-11-24 21:16   ` Pablo Neira Ayuso
2025-11-26 20:02     ` Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 08/14] ipvs: use resizable hash table for services Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 09/14] ipvs: switch to per-net connection table Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 10/14] ipvs: show the current conn_tab size to users Julian Anastasov
2025-11-24 21:21   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net Julian Anastasov
2025-11-24 21:29   ` Pablo Neira Ayuso [this message]
2025-11-26 20:08     ` Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 12/14] ipvs: use more keys for connection hashing Julian Anastasov
2025-10-19 15:57 ` [PATCHv6 net-next 13/14] ipvs: add ip_vs_status info Julian Anastasov
2025-11-24 21:42   ` Pablo Neira Ayuso
2025-10-19 15:57 ` [PATCHv6 net-next 14/14] ipvs: add conn_lfactor and svc_lfactor sysctl vars Julian Anastasov
2025-11-24 21:46 ` [PATCHv6 net-next 00/14] ipvs: per-net tables and optimizations Pablo Neira Ayuso
2025-11-26 20:16   ` Julian Anastasov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aSTOOez-GDzaG0LT@calendula \
    --to=pablo@netfilter.org \
    --cc=dust.li@linux.alibaba.com \
    --cc=horms@verge.net.au \
    --cc=ja@ssi.bg \
    --cc=jiejian@linux.alibaba.com \
    --cc=lvs-devel@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=rcu@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.