From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org,
pabeni@redhat.com, edumazet@google.com, fw@strlen.de,
horms@kernel.org
Subject: [PATCH net-next 01/15] ipvs: add conn_max sysctl to limit connections
Date: Sun, 7 Jun 2026 11:49:40 +0200 [thread overview]
Message-ID: <20260607094954.48892-2-pablo@netfilter.org> (raw)
In-Reply-To: <20260607094954.48892-1-pablo@netfilter.org>
From: Julian Anastasov <ja@ssi.bg>
Currently, we are using atomic_t to track the number of
connections. On 64-bit setups with large memory there is
a risk this counter to overflow. Also, setups with many
containers may need to tune the limit for connections.
Add sysctl control to limit the number of connections to
1,073,741,824 (64-bit) and 16,777,216 (32-bit).
Depending on the admin's privilege, the value is
used to change a soft or hard limit allowing
unprivileged admins to change the soft limit in
range determined by privileged admins.
Link: https://sashiko.dev/#/patchset/20260523172715.94795-1-ja%40ssi.bg
Link: https://sashiko.dev/#/patchset/20260430074420.26697-7-ja%40ssi.bg
Link: https://sashiko.dev/#/patchset/20260522105546.13732-1-ja%40ssi.bg
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
Documentation/networking/ipvs-sysctl.rst | 35 ++++++++++++++++
include/net/ip_vs.h | 22 ++++++++++
net/netfilter/ipvs/ip_vs_conn.c | 10 ++++-
net/netfilter/ipvs/ip_vs_ctl.c | 53 ++++++++++++++++++++++++
4 files changed, 119 insertions(+), 1 deletion(-)
diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst
index a556439f8be7..b6bac2612420 100644
--- a/Documentation/networking/ipvs-sysctl.rst
+++ b/Documentation/networking/ipvs-sysctl.rst
@@ -56,6 +56,41 @@ conn_lfactor - INTEGER
-4: grow if load goes above 6% (buckets = nodes * 16)
2: grow if load goes above 400% (buckets = nodes / 4)
+conn_max - INTEGER
+ Limit for number of connections, per netns.
+
+ Controls the soft and hard limit for number of connections.
+ Initially, the platform specific limit is assigned for init_net.
+ The value can be changed and later the soft limit propagated
+ to other networking namespaces.
+
+ Privileged admin can change both limits up to the value of the
+ platform limit while the unprivileged admin can change only the
+ soft limit up to the value of the hard limit.
+
+ For setups using conntrack=1 (CONFIG_IP_VS_NFCT for
+ Netfilter connection tracking) the connections can be
+ limited also by nf_conntrack_max.
+
+ soft limit hard limit
+ =====================================================
+ init_net:
+ create netns platform platform
+ priv admin 0 .. platform 0 .. platform
+ =====================================================
+ new netns:
+ create netns init_net:soft init_net:soft
+ priv admin 0 .. platform 0 .. platform
+ unpriv admin 0 .. hard N/A
+
+ Limits per platform:
+ 1,073,741,824 (2^30 for 64-bit)
+ 16,777,216 (2^24 for 32-bit)
+
+ Possible values: 0 .. platform limit
+
+ Default: platform limit
+
conn_reuse_mode - INTEGER
1 - default
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index e517eaaa177b..49297fec448a 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -44,6 +44,14 @@
#define IP_VS_CONN_TAB_MAX_BITS 20
#endif
+/* conn_max limits */
+#if BITS_PER_LONG > 32
+/* Limit of atomic_t but restricted by roundup_pow_of_two() in ip_vs_core.c */
+#define IP_VS_CONN_MAX (1 << 30)
+#else
+#define IP_VS_CONN_MAX (1 << 24)
+#endif
+
/* svc_table limits */
#define IP_VS_SVC_TAB_MIN_BITS 4
#define IP_VS_SVC_TAB_MAX_BITS 20
@@ -1220,6 +1228,10 @@ struct netns_ipvs {
/* sysctl variables */
int sysctl_amemthresh;
int sysctl_am_droprate;
+#ifdef CONFIG_SYSCTL
+ int sysctl_conn_max;/* soft limit for conns */
+ int conn_max_limit; /* hard limit for conn_max */
+#endif
int sysctl_drop_entry;
int sysctl_drop_packet;
int sysctl_secure_tcp;
@@ -1317,6 +1329,11 @@ struct netns_ipvs {
#ifdef CONFIG_SYSCTL
+static inline int sysctl_conn_max(struct netns_ipvs *ipvs)
+{
+ return READ_ONCE(ipvs->sysctl_conn_max);
+}
+
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_sync_threshold[0];
@@ -1436,6 +1453,11 @@ static inline int sysctl_est_nice(struct netns_ipvs *ipvs)
#else
+static inline int sysctl_conn_max(struct netns_ipvs *ipvs)
+{
+ return IP_VS_CONN_MAX;
+}
+
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
{
return DEFAULT_SYNC_THRESHOLD;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9ea6b4fa78bf..e76a73d183d5 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1358,9 +1358,18 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
struct netns_ipvs *ipvs = p->ipvs;
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
p->protocol);
+ /* Increment conn_count up to conn_max */
+ int count = atomic_read(&ipvs->conn_count);
+ int max = sysctl_conn_max(ipvs);
+
+ do {
+ if (count >= max)
+ return NULL;
+ } while (!atomic_try_cmpxchg(&ipvs->conn_count, &count, count + 1));
cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
+ atomic_dec(&ipvs->conn_count);
IP_VS_ERR_RL("%s(): no memory\n", __func__);
return NULL;
}
@@ -1414,7 +1423,6 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
cp->in_seq.delta = 0;
cp->out_seq.delta = 0;
- atomic_inc(&ipvs->conn_count);
if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) {
int af_id = ip_vs_af_index(cp->af);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 16daba8cac83..f765d1506839 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2322,6 +2322,45 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
#ifdef CONFIG_SYSCTL
+static int
+proc_do_conn_max(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = table->data;
+ /* We can not use *valp to check if new value is provided, use INT_MIN
+ * for this because different admins change different limits.
+ */
+ int unset = INT_MIN;
+ int val = write ? unset : READ_ONCE(*valp);
+ int rc;
+
+ const struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(int),
+ };
+
+ rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+ if (write && !rc && val != unset) {
+ struct netns_ipvs *ipvs = table->extra2;
+ bool priv = capable(CAP_NET_ADMIN);
+ int max;
+
+ mutex_lock(&ipvs->service_mutex);
+ /* Unprivileged admins can not go above the hard limit */
+ max = priv ? IP_VS_CONN_MAX : ipvs->conn_max_limit;
+ if (val < 0 || val > max) {
+ rc = -EINVAL;
+ } else {
+ /* Privileged admin changes both limits */
+ if (priv)
+ ipvs->conn_max_limit = val;
+ WRITE_ONCE(*valp, val);
+ }
+ mutex_unlock(&ipvs->service_mutex);
+ }
+ return rc;
+}
+
static int
proc_do_defense_mode(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -2626,6 +2665,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "conn_max",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_conn_max,
+ },
{
.procname = "drop_entry",
.maxlen = sizeof(int),
@@ -4980,6 +5025,14 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_amemthresh;
ipvs->sysctl_am_droprate = 10;
tbl[idx++].data = &ipvs->sysctl_am_droprate;
+
+ /* Inherit both limits from init_net:conn_max */
+ ipvs->conn_max_limit = net_eq(net, &init_net) ? IP_VS_CONN_MAX :
+ READ_ONCE(*(int *)vs_vars[idx].data);
+ ipvs->sysctl_conn_max = ipvs->conn_max_limit;
+ tbl[idx].extra2 = ipvs;
+ tbl[idx++].data = &ipvs->sysctl_conn_max;
+
tbl[idx++].data = &ipvs->sysctl_drop_entry;
tbl[idx++].data = &ipvs->sysctl_drop_packet;
#ifdef CONFIG_IP_VS_NFCT
--
2.47.3
next prev parent reply other threads:[~2026-06-07 9:50 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-07 9:49 [PATCH net-next 00/15] Netfilter/IPVS updates for net-next Pablo Neira Ayuso
2026-06-07 9:49 ` Pablo Neira Ayuso [this message]
2026-06-08 22:40 ` [PATCH net-next 01/15] ipvs: add conn_max sysctl to limit connections patchwork-bot+netdevbpf
2026-06-07 9:49 ` [PATCH net-next 02/15] netfilter: nfnetlink_osf: fix mss parsing on big-endian architectures Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 03/15] netfilter: nfnetlink_cthelper: use {READ,WRITE}_ONCE for accessing helper flags Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 04/15] netfilter: synproxy: drop packets if timestamp adjustment fails Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 05/15] netfilter: synproxy: adjust duplicate timestamp options Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 06/15] netfilter: synproxy: fix unaligned memory access in timestamp adjustment Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 07/15] netfilter: synproxy: protect nf_ct_seqadj_init() with conntrack lock Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 08/15] netfilter: cttimeout: detach dataplane timeout policy and repurpose refcount Pablo Neira Ayuso
2026-06-08 22:30 ` Pablo Neira Ayuso
2026-06-08 22:57 ` Jakub Kicinski
2026-06-08 23:13 ` Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 09/15] netfilter: nf_conntrack_helper: dynamically allocate struct nf_conntrack_helper Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 10/15] netfilter: nf_conntrack_pptp: move GRE specific cleanup to GRE tracker Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 11/15] netfilter: nf_conntrack_helper: add refcounting from datapath Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 12/15] netfilter: conntrack: revert ct extension genid infrastructure Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 13/15] netfilter: conntrack: call nf_ct_gre_keymap_destroy() if master helper is pptp Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 14/15] netfilter: flowtable: avoid num_encaps underflow on bridge VLAN untag Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 15/15] netfilter: nf_conntrack: use get_unaligned_be32() in tcp_sack() Pablo Neira Ayuso
2026-06-08 11:58 ` [PATCH net-next 00/15] Netfilter/IPVS updates for net-next Pablo Neira Ayuso
2026-06-08 22:39 ` Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260607094954.48892-2-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.