From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org,
pabeni@redhat.com, edumazet@google.com, fw@strlen.de,
horms@kernel.org
Subject: [PATCH net-next 01/15] ipvs: add conn_max sysctl to limit connections
Date: Sun, 7 Jun 2026 11:49:40 +0200 [thread overview]
Message-ID: <20260607094954.48892-2-pablo@netfilter.org> (raw)
In-Reply-To: <20260607094954.48892-1-pablo@netfilter.org>
From: Julian Anastasov <ja@ssi.bg>
Currently, we are using atomic_t to track the number of
connections. On 64-bit setups with large memory there is
a risk this counter to overflow. Also, setups with many
containers may need to tune the limit for connections.
Add sysctl control to limit the number of connections to
1,073,741,824 (64-bit) and 16,777,216 (32-bit).
Depending on the admin's privilege, the value is
used to change a soft or hard limit allowing
unprivileged admins to change the soft limit in
range determined by privileged admins.
Link: https://sashiko.dev/#/patchset/20260523172715.94795-1-ja%40ssi.bg
Link: https://sashiko.dev/#/patchset/20260430074420.26697-7-ja%40ssi.bg
Link: https://sashiko.dev/#/patchset/20260522105546.13732-1-ja%40ssi.bg
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
Documentation/networking/ipvs-sysctl.rst | 35 ++++++++++++++++
include/net/ip_vs.h | 22 ++++++++++
net/netfilter/ipvs/ip_vs_conn.c | 10 ++++-
net/netfilter/ipvs/ip_vs_ctl.c | 53 ++++++++++++++++++++++++
4 files changed, 119 insertions(+), 1 deletion(-)
diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst
index a556439f8be7..b6bac2612420 100644
--- a/Documentation/networking/ipvs-sysctl.rst
+++ b/Documentation/networking/ipvs-sysctl.rst
@@ -56,6 +56,41 @@ conn_lfactor - INTEGER
-4: grow if load goes above 6% (buckets = nodes * 16)
2: grow if load goes above 400% (buckets = nodes / 4)
+conn_max - INTEGER
+ Limit for number of connections, per netns.
+
+ Controls the soft and hard limit for number of connections.
+ Initially, the platform specific limit is assigned for init_net.
+ The value can be changed and later the soft limit propagated
+ to other networking namespaces.
+
+ Privileged admin can change both limits up to the value of the
+ platform limit while the unprivileged admin can change only the
+ soft limit up to the value of the hard limit.
+
+ For setups using conntrack=1 (CONFIG_IP_VS_NFCT for
+ Netfilter connection tracking) the connections can be
+ limited also by nf_conntrack_max.
+
+ soft limit hard limit
+ =====================================================
+ init_net:
+ create netns platform platform
+ priv admin 0 .. platform 0 .. platform
+ =====================================================
+ new netns:
+ create netns init_net:soft init_net:soft
+ priv admin 0 .. platform 0 .. platform
+ unpriv admin 0 .. hard N/A
+
+ Limits per platform:
+ 1,073,741,824 (2^30 for 64-bit)
+ 16,777,216 (2^24 for 32-bit)
+
+ Possible values: 0 .. platform limit
+
+ Default: platform limit
+
conn_reuse_mode - INTEGER
1 - default
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index e517eaaa177b..49297fec448a 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -44,6 +44,14 @@
#define IP_VS_CONN_TAB_MAX_BITS 20
#endif
+/* conn_max limits */
+#if BITS_PER_LONG > 32
+/* Limit of atomic_t but restricted by roundup_pow_of_two() in ip_vs_core.c */
+#define IP_VS_CONN_MAX (1 << 30)
+#else
+#define IP_VS_CONN_MAX (1 << 24)
+#endif
+
/* svc_table limits */
#define IP_VS_SVC_TAB_MIN_BITS 4
#define IP_VS_SVC_TAB_MAX_BITS 20
@@ -1220,6 +1228,10 @@ struct netns_ipvs {
/* sysctl variables */
int sysctl_amemthresh;
int sysctl_am_droprate;
+#ifdef CONFIG_SYSCTL
+ int sysctl_conn_max;/* soft limit for conns */
+ int conn_max_limit; /* hard limit for conn_max */
+#endif
int sysctl_drop_entry;
int sysctl_drop_packet;
int sysctl_secure_tcp;
@@ -1317,6 +1329,11 @@ struct netns_ipvs {
#ifdef CONFIG_SYSCTL
+static inline int sysctl_conn_max(struct netns_ipvs *ipvs)
+{
+ return READ_ONCE(ipvs->sysctl_conn_max);
+}
+
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_sync_threshold[0];
@@ -1436,6 +1453,11 @@ static inline int sysctl_est_nice(struct netns_ipvs *ipvs)
#else
+static inline int sysctl_conn_max(struct netns_ipvs *ipvs)
+{
+ return IP_VS_CONN_MAX;
+}
+
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
{
return DEFAULT_SYNC_THRESHOLD;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9ea6b4fa78bf..e76a73d183d5 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1358,9 +1358,18 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
struct netns_ipvs *ipvs = p->ipvs;
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
p->protocol);
+ /* Increment conn_count up to conn_max */
+ int count = atomic_read(&ipvs->conn_count);
+ int max = sysctl_conn_max(ipvs);
+
+ do {
+ if (count >= max)
+ return NULL;
+ } while (!atomic_try_cmpxchg(&ipvs->conn_count, &count, count + 1));
cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
+ atomic_dec(&ipvs->conn_count);
IP_VS_ERR_RL("%s(): no memory\n", __func__);
return NULL;
}
@@ -1414,7 +1423,6 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
cp->in_seq.delta = 0;
cp->out_seq.delta = 0;
- atomic_inc(&ipvs->conn_count);
if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) {
int af_id = ip_vs_af_index(cp->af);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 16daba8cac83..f765d1506839 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2322,6 +2322,45 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
#ifdef CONFIG_SYSCTL
+static int
+proc_do_conn_max(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = table->data;
+ /* We can not use *valp to check if new value is provided, use INT_MIN
+ * for this because different admins change different limits.
+ */
+ int unset = INT_MIN;
+ int val = write ? unset : READ_ONCE(*valp);
+ int rc;
+
+ const struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(int),
+ };
+
+ rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+ if (write && !rc && val != unset) {
+ struct netns_ipvs *ipvs = table->extra2;
+ bool priv = capable(CAP_NET_ADMIN);
+ int max;
+
+ mutex_lock(&ipvs->service_mutex);
+ /* Unprivileged admins can not go above the hard limit */
+ max = priv ? IP_VS_CONN_MAX : ipvs->conn_max_limit;
+ if (val < 0 || val > max) {
+ rc = -EINVAL;
+ } else {
+ /* Privileged admin changes both limits */
+ if (priv)
+ ipvs->conn_max_limit = val;
+ WRITE_ONCE(*valp, val);
+ }
+ mutex_unlock(&ipvs->service_mutex);
+ }
+ return rc;
+}
+
static int
proc_do_defense_mode(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -2626,6 +2665,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "conn_max",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_do_conn_max,
+ },
{
.procname = "drop_entry",
.maxlen = sizeof(int),
@@ -4980,6 +5025,14 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_amemthresh;
ipvs->sysctl_am_droprate = 10;
tbl[idx++].data = &ipvs->sysctl_am_droprate;
+
+ /* Inherit both limits from init_net:conn_max */
+ ipvs->conn_max_limit = net_eq(net, &init_net) ? IP_VS_CONN_MAX :
+ READ_ONCE(*(int *)vs_vars[idx].data);
+ ipvs->sysctl_conn_max = ipvs->conn_max_limit;
+ tbl[idx].extra2 = ipvs;
+ tbl[idx++].data = &ipvs->sysctl_conn_max;
+
tbl[idx++].data = &ipvs->sysctl_drop_entry;
tbl[idx++].data = &ipvs->sysctl_drop_packet;
#ifdef CONFIG_IP_VS_NFCT
--
2.47.3
next prev parent reply other threads:[~2026-06-07 9:50 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-07 9:49 [PATCH net-next 00/15] Netfilter/IPVS updates for net-next Pablo Neira Ayuso
2026-06-07 9:49 ` Pablo Neira Ayuso [this message]
2026-06-07 9:49 ` [PATCH net-next 02/15] netfilter: nfnetlink_osf: fix mss parsing on big-endian architectures Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 03/15] netfilter: nfnetlink_cthelper: use {READ,WRITE}_ONCE for accessing helper flags Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 04/15] netfilter: synproxy: drop packets if timestamp adjustment fails Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 05/15] netfilter: synproxy: adjust duplicate timestamp options Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 06/15] netfilter: synproxy: fix unaligned memory access in timestamp adjustment Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 07/15] netfilter: synproxy: protect nf_ct_seqadj_init() with conntrack lock Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 08/15] netfilter: cttimeout: detach dataplane timeout policy and repurpose refcount Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 09/15] netfilter: nf_conntrack_helper: dynamically allocate struct nf_conntrack_helper Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 10/15] netfilter: nf_conntrack_pptp: move GRE specific cleanup to GRE tracker Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 11/15] netfilter: nf_conntrack_helper: add refcounting from datapath Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 12/15] netfilter: conntrack: revert ct extension genid infrastructure Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 13/15] netfilter: conntrack: call nf_ct_gre_keymap_destroy() if master helper is pptp Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 14/15] netfilter: flowtable: avoid num_encaps underflow on bridge VLAN untag Pablo Neira Ayuso
2026-06-07 9:49 ` [PATCH net-next 15/15] netfilter: nf_conntrack: use get_unaligned_be32() in tcp_sack() Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260607094954.48892-2-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox