From: Julian Anastasov <ja@ssi.bg>
To: Simon Horman <horms@verge.net.au>
Cc: lvs-devel@vger.kernel.org, netfilter-devel@vger.kernel.org,
Dust Li <dust.li@linux.alibaba.com>,
Jiejian Wu <jiejian@linux.alibaba.com>,
rcu@vger.kernel.org
Subject: [PATCHv4 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net
Date: Tue, 28 May 2024 11:02:31 +0300 [thread overview]
Message-ID: <20240528080234.10148-12-ja@ssi.bg> (raw)
In-Reply-To: <20240528080234.10148-1-ja@ssi.bg>
With using per-net conn_tab these counters do not need to be
global anymore.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
---
include/net/ip_vs.h | 2 ++
net/netfilter/ipvs/ip_vs_conn.c | 62 ++++++++++++++++++++-------------
2 files changed, 39 insertions(+), 25 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 074f45f89c80..e091c84c8a11 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1158,6 +1158,7 @@ struct netns_ipvs {
#endif
/* ip_vs_conn */
atomic_t conn_count; /* connection counter */
+ atomic_t no_cport_conns[IP_VS_AF_MAX];
struct delayed_work conn_resize_work;/* resize conn_tab */
/* ip_vs_ctl */
@@ -1188,6 +1189,7 @@ struct netns_ipvs {
int drop_counter;
int old_secure_tcp;
atomic_t dropentry;
+ s8 dropentry_counters[8];
/* locks in ctl.c */
spinlock_t dropentry_lock; /* drop entry handling */
spinlock_t droppacket_lock; /* drop packet handling */
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 68d25dbb38a5..ad45fb4e1cc2 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -54,9 +54,6 @@ int ip_vs_conn_tab_size __read_mostly;
/* SLAB cache for IPVS connections */
static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
-/* counter for no client port connections */
-static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
-
/* We need an addrstrlen that works with or without v6 */
#ifdef CONFIG_IP_VS_IPV6
#define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
@@ -319,10 +316,16 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
struct ip_vs_conn *cp;
cp = __ip_vs_conn_in_get(p);
- if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
- struct ip_vs_conn_param cport_zero_p = *p;
- cport_zero_p.cport = 0;
- cp = __ip_vs_conn_in_get(&cport_zero_p);
+ if (!cp) {
+ struct netns_ipvs *ipvs = p->ipvs;
+ int af_id = ip_vs_af_index(p->af);
+
+ if (atomic_read(&ipvs->no_cport_conns[af_id])) {
+ struct ip_vs_conn_param cport_zero_p = *p;
+
+ cport_zero_p.cport = 0;
+ cp = __ip_vs_conn_in_get(&cport_zero_p);
+ }
}
IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
@@ -535,6 +538,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
{
struct hlist_bl_head *head, *head2, *head_new;
struct netns_ipvs *ipvs = cp->ipvs;
+ int af_id = ip_vs_af_index(cp->af);
u32 hash_r = 0, hash_key_r = 0;
struct ip_vs_rht *t, *tp, *t2;
u32 hash_key, hash_key_new;
@@ -613,7 +617,7 @@ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
hlist_bl_del_rcu(&cp->c_list);
hlist_bl_add_head_rcu(&cp->c_list, head_new);
}
- atomic_dec(&ip_vs_conn_no_cport_cnt);
+ atomic_dec(&ipvs->no_cport_conns[af_id]);
cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
cp->cport = cport;
}
@@ -1169,8 +1173,11 @@ static void ip_vs_conn_expire(struct timer_list *t)
if (unlikely(cp->app != NULL))
ip_vs_unbind_app(cp);
ip_vs_unbind_dest(cp);
- if (cp->flags & IP_VS_CONN_F_NO_CPORT)
- atomic_dec(&ip_vs_conn_no_cport_cnt);
+ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+ int af_id = ip_vs_af_index(cp->af);
+
+ atomic_dec(&ipvs->no_cport_conns[af_id]);
+ }
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
ip_vs_conn_rcu_free(&cp->rcu_head);
else
@@ -1277,8 +1284,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
cp->out_seq.delta = 0;
atomic_inc(&ipvs->conn_count);
- if (flags & IP_VS_CONN_F_NO_CPORT)
- atomic_inc(&ip_vs_conn_no_cport_cnt);
+ if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) {
+ int af_id = ip_vs_af_index(cp->af);
+
+ atomic_inc(&ipvs->no_cport_conns[af_id]);
+ }
/* Bind the connection with a destination server */
cp->dest = NULL;
@@ -1554,6 +1564,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
};
#endif
+#ifdef CONFIG_SYSCTL
/* Randomly drop connection entries before running out of memory
* Can be used for DATA and CTL conns. For TPL conns there are exceptions:
@@ -1563,12 +1574,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
*/
static inline int todrop_entry(struct ip_vs_conn *cp)
{
- /*
- * The drop rate array needs tuning for real environments.
- * Called from timer bh only => no locking
- */
- static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
- static signed char todrop_counter[9] = {0};
+ struct netns_ipvs *ipvs = cp->ipvs;
int i;
/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
@@ -1577,15 +1583,17 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
return 0;
- /* Don't drop the entry if its number of incoming packets is not
- located in [0, 8] */
+ /* Drop only conns with number of incoming packets in [1..8] range */
i = atomic_read(&cp->in_pkts);
- if (i > 8 || i < 0) return 0;
+ if (i > 8 || i < 1)
+ return 0;
- if (!todrop_rate[i]) return 0;
- if (--todrop_counter[i] > 0) return 0;
+ i--;
+ if (--ipvs->dropentry_counters[i] > 0)
+ return 0;
- todrop_counter[i] = todrop_rate[i];
+ /* Prefer to drop conns with less number of incoming packets */
+ ipvs->dropentry_counters[i] = i + 1;
return 1;
}
@@ -1679,7 +1687,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
out:
rcu_read_unlock();
}
-
+#endif
/* Flush all the connection entries in the conn_tab */
static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
@@ -1804,7 +1812,11 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
*/
int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
{
+ int idx;
+
atomic_set(&ipvs->conn_count, 0);
+ for (idx = 0; idx < IP_VS_AF_MAX; idx++)
+ atomic_set(&ipvs->no_cport_conns[idx], 0);
INIT_DELAYED_WORK(&ipvs->conn_resize_work, conn_resize_work_handler);
RCU_INIT_POINTER(ipvs->conn_tab, NULL);
atomic_set(&ipvs->conn_tab_changes, 0);
--
2.44.0
next prev parent reply other threads:[~2024-05-28 8:07 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-28 8:02 [PATCHv4 net-next 00/14] ipvs: per-net tables and optimizations Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 01/14] rculist_bl: add hlist_bl_for_each_entry_continue_rcu Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 02/14] ipvs: make ip_vs_svc_table and ip_vs_svc_fwm_table per netns Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 03/14] ipvs: some service readers can use RCU Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 04/14] ipvs: use single svc table Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 05/14] ipvs: do not keep dest_dst after dest is removed Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 06/14] ipvs: use more counters to avoid service lookups Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 07/14] ipvs: add resizable hash tables Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 08/14] ipvs: use resizable hash table for services Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 09/14] ipvs: switch to per-net connection table Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 10/14] ipvs: show the current conn_tab size to users Julian Anastasov
2024-05-28 8:02 ` Julian Anastasov [this message]
2024-05-28 8:02 ` [PATCHv4 net-next 12/14] ipvs: use more keys for connection hashing Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 13/14] ipvs: add ip_vs_status info Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 14/14] ipvs: add conn_lfactor and svc_lfactor sysctl vars Julian Anastasov
2025-10-17 2:54 ` [PATCHv4 net-next 00/14] ipvs: per-net tables and optimizations Dust Li
2025-10-17 3:45 ` Julian Anastasov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240528080234.10148-12-ja@ssi.bg \
--to=ja@ssi.bg \
--cc=dust.li@linux.alibaba.com \
--cc=horms@verge.net.au \
--cc=jiejian@linux.alibaba.com \
--cc=lvs-devel@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=rcu@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).