* [PATCH v1] nf_conntrack: sysctl: expose gc worker scan interval via sysctl
@ 2025-04-29 13:09 avimalin
2025-04-29 13:29 ` Florian Westphal
0 siblings, 1 reply; 9+ messages in thread
From: avimalin @ 2025-04-29 13:09 UTC (permalink / raw)
To: vimal.agrawal, linux-kernel, pablo, netfilter-devel, fw,
anirudh.gupta
From: Vimal Agrawal <vimal.agrawal@sophos.com>
Default initial gc scan interval of 60 secs is too long for system
with low number of conntracks causing delay in conntrack deletion.
It is affecting userspace which are replying on timely arrival of
conntrack destroy event. So it is better that this is controlled
through sysctl
Fixes: 2aa192757005 ("netfilter: conntrack: revisit the gc initial rescheduling bias")
Signed-off-by: Vimal Agrawal <vimal.agrawal@sophos.com>
Reviewed-by: Anirudh Gupta <anirudh.gupta@sophos.com>
---
include/net/netfilter/nf_conntrack.h | 1 +
net/netfilter/nf_conntrack_core.c | 4 +++-
net/netfilter/nf_conntrack_standalone.c | 8 ++++++++
3 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 3f02a45773e8..eaf1933687b2 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -321,6 +321,7 @@ extern struct hlist_nulls_head *nf_conntrack_hash;
extern unsigned int nf_conntrack_htable_size;
extern seqcount_spinlock_t nf_conntrack_generation;
extern unsigned int nf_conntrack_max;
+extern unsigned int nf_conntrack_gc_scan_interval_init;
/* must be called with rcu read lock held */
static inline void
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7f8b245e287a..d7e03c29765a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -204,6 +204,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_max);
+__read_mostly unsigned int nf_conntrack_gc_scan_interval_init = GC_SCAN_INTERVAL_INIT;
+EXPORT_SYMBOL_GPL(nf_conntrack_gc_scan_interval_init);
seqcount_spinlock_t nf_conntrack_generation __read_mostly;
static siphash_aligned_key_t nf_conntrack_hash_rnd;
@@ -1513,7 +1515,7 @@ static void gc_worker(struct work_struct *work)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
if (i == 0) {
- gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+ gc_work->avg_timeout = nf_conntrack_gc_scan_interval_init;
gc_work->count = GC_SCAN_INITIAL_COUNT;
gc_work->start_time = start_time;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 2f666751c7e7..480ff9a6f185 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -559,6 +559,7 @@ enum nf_ct_sysctl_index {
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
NF_SYSCTL_CT_TIMESTAMP,
#endif
+ NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT,
NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV,
@@ -691,6 +692,13 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
+ [NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT] = {
+ .procname = "nf_conntrack_gc_scan_interval_init",
+ .data = &nf_conntrack_gc_scan_interval_init,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = {
.procname = "nf_conntrack_generic_timeout",
.maxlen = sizeof(unsigned int),
--
2.17.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v1] nf_conntrack: sysctl: expose gc worker scan interval via sysctl
2025-04-29 13:09 [PATCH v1] nf_conntrack: sysctl: expose gc worker scan interval via sysctl avimalin
@ 2025-04-29 13:29 ` Florian Westphal
2025-04-30 6:43 ` [PATCH v2] " avimalin
0 siblings, 1 reply; 9+ messages in thread
From: Florian Westphal @ 2025-04-29 13:29 UTC (permalink / raw)
To: avimalin
Cc: vimal.agrawal, linux-kernel, pablo, netfilter-devel, fw,
anirudh.gupta
avimalin@gmail.com <avimalin@gmail.com> wrote:
> diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
> index 2f666751c7e7..480ff9a6f185 100644
> --- a/net/netfilter/nf_conntrack_standalone.c
> +++ b/net/netfilter/nf_conntrack_standalone.c
> @@ -559,6 +559,7 @@ enum nf_ct_sysctl_index {
> #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
> NF_SYSCTL_CT_TIMESTAMP,
> #endif
> + NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT,
> NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC,
> NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT,
> NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV,
> @@ -691,6 +692,13 @@ static struct ctl_table nf_ct_sysctl_table[] = {
> .extra2 = SYSCTL_ONE,
> },
> #endif
> + [NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT] = {
> + .procname = "nf_conntrack_gc_scan_interval_init",
> + .data = &nf_conntrack_gc_scan_interval_init,
> + .maxlen = sizeof(unsigned int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_jiffies,
> + },
> [NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = {
> .procname = "nf_conntrack_generic_timeout",
> .maxlen = sizeof(unsigned int),
I think you'll need to add NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT to
the
/* Don't allow non-init_net ns to alter global sysctls */
if (!net_eq(&init_net, net)) {
branch in nf_conntrack_standalone_init_sysctl().
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2] nf_conntrack: sysctl: expose gc worker scan interval via sysctl
2025-04-29 13:29 ` Florian Westphal
@ 2025-04-30 6:43 ` avimalin
2025-04-30 7:11 ` Florian Westphal
0 siblings, 1 reply; 9+ messages in thread
From: avimalin @ 2025-04-30 6:43 UTC (permalink / raw)
To: vimal.agrawal, linux-kernel, pablo, netfilter-devel, fw,
anirudh.gupta
From: Vimal Agrawal <vimal.agrawal@sophos.com>
Default initial gc scan interval of 60 secs is too long for system
with low number of conntracks causing delay in conntrack deletion.
It is affecting userspace which are replying on timely arrival of
conntrack destroy event. So it is better that this is controlled
through sysctl
Fixes: 2aa192757005 ("netfilter: conntrack: revisit the gc initial rescheduling bias")
Signed-off-by: Vimal Agrawal <vimal.agrawal@sophos.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Anirudh Gupta <anirudh.gupta@sophos.com>
---
v2: Don't allow non-init_net ns to alter this global sysctl
include/net/netfilter/nf_conntrack.h | 1 +
net/netfilter/nf_conntrack_core.c | 4 +++-
net/netfilter/nf_conntrack_standalone.c | 9 +++++++++
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 3f02a45773e8..eaf1933687b2 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -321,6 +321,7 @@ extern struct hlist_nulls_head *nf_conntrack_hash;
extern unsigned int nf_conntrack_htable_size;
extern seqcount_spinlock_t nf_conntrack_generation;
extern unsigned int nf_conntrack_max;
+extern unsigned int nf_conntrack_gc_scan_interval_init;
/* must be called with rcu read lock held */
static inline void
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7f8b245e287a..d7e03c29765a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -204,6 +204,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_max);
+__read_mostly unsigned int nf_conntrack_gc_scan_interval_init = GC_SCAN_INTERVAL_INIT;
+EXPORT_SYMBOL_GPL(nf_conntrack_gc_scan_interval_init);
seqcount_spinlock_t nf_conntrack_generation __read_mostly;
static siphash_aligned_key_t nf_conntrack_hash_rnd;
@@ -1513,7 +1515,7 @@ static void gc_worker(struct work_struct *work)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
if (i == 0) {
- gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+ gc_work->avg_timeout = nf_conntrack_gc_scan_interval_init;
gc_work->count = GC_SCAN_INITIAL_COUNT;
gc_work->start_time = start_time;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 2f666751c7e7..bdbf37a938bb 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -559,6 +559,7 @@ enum nf_ct_sysctl_index {
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
NF_SYSCTL_CT_TIMESTAMP,
#endif
+ NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT,
NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV,
@@ -691,6 +692,13 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
+ [NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT] = {
+ .procname = "nf_conntrack_gc_scan_interval_init",
+ .data = &nf_conntrack_gc_scan_interval_init,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = {
.procname = "nf_conntrack_generic_timeout",
.maxlen = sizeof(unsigned int),
@@ -1090,6 +1098,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_MAX].mode = 0444;
table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444;
table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
+ table[NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT].mode = 0444;
}
cnet->sysctl_header = register_net_sysctl_sz(net, "net/netfilter",
--
2.17.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2] nf_conntrack: sysctl: expose gc worker scan interval via sysctl
2025-04-30 6:43 ` [PATCH v2] " avimalin
@ 2025-04-30 7:11 ` Florian Westphal
2025-04-30 7:28 ` [PATCH v3] " avimalin
0 siblings, 1 reply; 9+ messages in thread
From: Florian Westphal @ 2025-04-30 7:11 UTC (permalink / raw)
To: avimalin
Cc: vimal.agrawal, linux-kernel, pablo, netfilter-devel, fw,
anirudh.gupta
> v2: Don't allow non-init_net ns to alter this global sysctl
Looks good.
> include/net/netfilter/nf_conntrack.h | 1 +
> net/netfilter/nf_conntrack_core.c | 4 +++-
> net/netfilter/nf_conntrack_standalone.c | 9 +++++++++
> 3 files changed, 13 insertions(+), 1 deletion(-)
Sorry, I forgot about
Documentation/networking/nf_conntrack-sysctl.rst
Can you add a short description to that file?
I don't think anything else is missing after this.
Thanks.
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3] nf_conntrack: sysctl: expose gc worker scan interval via sysctl
2025-04-30 7:11 ` Florian Westphal
@ 2025-04-30 7:28 ` avimalin
2025-04-30 7:57 ` Florian Westphal
2025-10-15 13:32 ` Florian Westphal
0 siblings, 2 replies; 9+ messages in thread
From: avimalin @ 2025-04-30 7:28 UTC (permalink / raw)
To: vimal.agrawal, linux-kernel, pablo, netfilter-devel, fw,
anirudh.gupta
From: Vimal Agrawal <vimal.agrawal@sophos.com>
Default initial gc scan interval of 60 secs is too long for system
with low number of conntracks causing delay in conntrack deletion.
It is affecting userspace which are replying on timely arrival of
conntrack destroy event. So it is better that this is controlled
through sysctl
Fixes: 2aa192757005 ("netfilter: conntrack: revisit the gc initial rescheduling bias")
Signed-off-by: Vimal Agrawal <vimal.agrawal@sophos.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Anirudh Gupta <anirudh.gupta@sophos.com>
---
v2: Don't allow non-init_net ns to alter this global sysctl
v3: Add documentation in nf_conntrack-sysctl.rst
Documentation/networking/nf_conntrack-sysctl.rst | 5 +++++
include/net/netfilter/nf_conntrack.h | 1 +
net/netfilter/nf_conntrack_core.c | 4 +++-
net/netfilter/nf_conntrack_standalone.c | 9 +++++++++
4 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index 238b66d0e059..207b62047639 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -64,6 +64,11 @@ nf_conntrack_frag6_timeout - INTEGER (seconds)
Time to keep an IPv6 fragment in memory.
+nf_conntrack_gc_scan_interval_init - INTEGER (seconds)
+ default 60
+
+ Default for garbage collector's initial scan interval.
+
nf_conntrack_generic_timeout - INTEGER (seconds)
default 600
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 3f02a45773e8..eaf1933687b2 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -321,6 +321,7 @@ extern struct hlist_nulls_head *nf_conntrack_hash;
extern unsigned int nf_conntrack_htable_size;
extern seqcount_spinlock_t nf_conntrack_generation;
extern unsigned int nf_conntrack_max;
+extern unsigned int nf_conntrack_gc_scan_interval_init;
/* must be called with rcu read lock held */
static inline void
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7f8b245e287a..d7e03c29765a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -204,6 +204,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_max);
+__read_mostly unsigned int nf_conntrack_gc_scan_interval_init = GC_SCAN_INTERVAL_INIT;
+EXPORT_SYMBOL_GPL(nf_conntrack_gc_scan_interval_init);
seqcount_spinlock_t nf_conntrack_generation __read_mostly;
static siphash_aligned_key_t nf_conntrack_hash_rnd;
@@ -1513,7 +1515,7 @@ static void gc_worker(struct work_struct *work)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
if (i == 0) {
- gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+ gc_work->avg_timeout = nf_conntrack_gc_scan_interval_init;
gc_work->count = GC_SCAN_INITIAL_COUNT;
gc_work->start_time = start_time;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 2f666751c7e7..bdbf37a938bb 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -559,6 +559,7 @@ enum nf_ct_sysctl_index {
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
NF_SYSCTL_CT_TIMESTAMP,
#endif
+ NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT,
NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV,
@@ -691,6 +692,13 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
+ [NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT] = {
+ .procname = "nf_conntrack_gc_scan_interval_init",
+ .data = &nf_conntrack_gc_scan_interval_init,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = {
.procname = "nf_conntrack_generic_timeout",
.maxlen = sizeof(unsigned int),
@@ -1090,6 +1098,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_MAX].mode = 0444;
table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444;
table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
+ table[NF_SYSCTL_CT_GC_SCAN_INTERVAL_INIT].mode = 0444;
}
cnet->sysctl_header = register_net_sysctl_sz(net, "net/netfilter",
--
2.17.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v3] nf_conntrack: sysctl: expose gc worker scan interval via sysctl
2025-04-30 7:28 ` [PATCH v3] " avimalin
@ 2025-04-30 7:57 ` Florian Westphal
2025-05-03 2:27 ` Vimal Agrawal
2025-10-15 13:32 ` Florian Westphal
1 sibling, 1 reply; 9+ messages in thread
From: Florian Westphal @ 2025-04-30 7:57 UTC (permalink / raw)
To: avimalin
Cc: vimal.agrawal, linux-kernel, pablo, netfilter-devel, fw,
anirudh.gupta
avimalin@gmail.com <avimalin@gmail.com> wrote:
> From: Vimal Agrawal <vimal.agrawal@sophos.com>
>
> Default initial gc scan interval of 60 secs is too long for system
> with low number of conntracks causing delay in conntrack deletion.
> It is affecting userspace which are replying on timely arrival of
> conntrack destroy event. So it is better that this is controlled
> through sysctl
Acked-by: Florian Westphal <fw@strlen.de>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3] nf_conntrack: sysctl: expose gc worker scan interval via sysctl
2025-04-30 7:57 ` Florian Westphal
@ 2025-05-03 2:27 ` Vimal Agrawal
2025-05-08 5:54 ` Vimal Agrawal
0 siblings, 1 reply; 9+ messages in thread
From: Vimal Agrawal @ 2025-05-03 2:27 UTC (permalink / raw)
To: Florian Westphal
Cc: vimal.agrawal, linux-kernel, pablo, netfilter-devel,
anirudh.gupta
Thanks Florian for the suggestions and comments.
Hi Pablo, netfilter-devel,
Could you also please review the patch and let me know if you have any comment/s
Thanks,
Vimal
On Wed, Apr 30, 2025 at 1:27 PM Florian Westphal <fw@strlen.de> wrote:
>
> avimalin@gmail.com <avimalin@gmail.com> wrote:
> > From: Vimal Agrawal <vimal.agrawal@sophos.com>
> >
> > Default initial gc scan interval of 60 secs is too long for system
> > with low number of conntracks causing delay in conntrack deletion.
> > It is affecting userspace which are replying on timely arrival of
> > conntrack destroy event. So it is better that this is controlled
> > through sysctl
>
> Acked-by: Florian Westphal <fw@strlen.de>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3] nf_conntrack: sysctl: expose gc worker scan interval via sysctl
2025-05-03 2:27 ` Vimal Agrawal
@ 2025-05-08 5:54 ` Vimal Agrawal
0 siblings, 0 replies; 9+ messages in thread
From: Vimal Agrawal @ 2025-05-08 5:54 UTC (permalink / raw)
To: Florian Westphal
Cc: vimal.agrawal, linux-kernel, pablo, netfilter-devel,
anirudh.gupta, Greg Kroah-Hartman
Hi all,
Let me know if you have any comment/s on the patch.
Thanks,
Vimal
On Sat, May 3, 2025 at 7:57 AM Vimal Agrawal <avimalin@gmail.com> wrote:
>
> Thanks Florian for the suggestions and comments.
>
> Hi Pablo, netfilter-devel,
> Could you also please review the patch and let me know if you have any comment/s
>
> Thanks,
> Vimal
>
> On Wed, Apr 30, 2025 at 1:27 PM Florian Westphal <fw@strlen.de> wrote:
> >
> > avimalin@gmail.com <avimalin@gmail.com> wrote:
> > > From: Vimal Agrawal <vimal.agrawal@sophos.com>
> > >
> > > Default initial gc scan interval of 60 secs is too long for system
> > > with low number of conntracks causing delay in conntrack deletion.
> > > It is affecting userspace which are replying on timely arrival of
> > > conntrack destroy event. So it is better that this is controlled
> > > through sysctl
> >
> > Acked-by: Florian Westphal <fw@strlen.de>
> >
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3] nf_conntrack: sysctl: expose gc worker scan interval via sysctl
2025-04-30 7:28 ` [PATCH v3] " avimalin
2025-04-30 7:57 ` Florian Westphal
@ 2025-10-15 13:32 ` Florian Westphal
1 sibling, 0 replies; 9+ messages in thread
From: Florian Westphal @ 2025-10-15 13:32 UTC (permalink / raw)
To: avimalin; +Cc: vimal.agrawal, linux-kernel, pablo, netfilter-devel,
anirudh.gupta
avimalin@gmail.com <avimalin@gmail.com> wrote:
> Default initial gc scan interval of 60 secs is too long for system
> with low number of conntracks causing delay in conntrack deletion.
> It is affecting userspace which are replying on timely arrival of
> conntrack destroy event. So it is better that this is controlled
> through sysctl
Patch is fine. I do wonder however if there are alternatives.
Rather than expose the gc interval (gc worker is internal implementation
detail, e.g. we could move back to per-ct timers theoretically).
What about something like this (untested):
[RFC] netfilter: conntrack: expedite evictions when userspace is subscribed to destroy events
Track number of soon-to-expire conntracks.
If enough entries are likely to expire within 1/2/4/8/16/32 second buckets,
then reschedule earlier than what the normal next value would be.
Do this only when userspace is listening to destroy event notifcations
via ctnetlink, otherwise its not relevant when a conntrack entry is
released.
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 210792a2275d..22274193b093 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -52,6 +52,8 @@
#include <net/netns/hash.h>
#include <net/ip.h>
+#include <uapi/linux/netfilter/nfnetlink.h>
+
#include "nf_internals.h"
__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
@@ -63,12 +65,15 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+#define GC_HORIZON_BUCKETS 6
+
struct conntrack_gc_work {
struct delayed_work dwork;
u32 next_bucket;
u32 avg_timeout;
u32 count;
u32 start_time;
+ u8 horizon_count[GC_HORIZON_BUCKETS];
bool exiting;
bool early_drop;
};
@@ -96,6 +101,10 @@ static DEFINE_MUTEX(nf_conntrack_mutex);
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
#define GC_SCAN_EXPIRED_MAX (64000u / HZ)
+/* schedule worker earlier if this many entries are about to expire
+ * in the near future */
+#define GC_SCAN_EXPEDITED min(255, (GC_HORIZON_BUCKETS * GC_SCAN_EXPIRED_MAX))
+
#define MIN_CHAINLEN 50u
#define MAX_CHAINLEN (80u - MIN_CHAINLEN)
@@ -1508,6 +1517,71 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
return false;
}
+static unsigned int gc_horizon_max(unsigned int i)
+{
+ return (1 << i) * HZ;
+}
+
+static void gc_horizon_account(struct conntrack_gc_work *gc, unsigned long expires)
+{
+ int i = ARRAY_SIZE(gc->horizon_count);
+
+ BUILD_BUG_ON(GC_SCAN_EXPEDITED > 255);
+
+ for (i = 0; i < ARRAY_SIZE(gc->horizon_count); i++) {
+ unsigned int max = gc_horizon_max(i);
+
+ if (gc->horizon_count[i] >= GC_SCAN_EXPEDITED)
+ return;
+
+ if (expires <= max) {
+ gc->horizon_count[i]++;
+ return;
+ }
+ }
+}
+
+static bool nf_ctnetlink_has_listeners(void)
+{
+ u8 v = READ_ONCE(nf_ctnetlink_has_listener);
+
+ return v & (1 << NFNLGRP_CONNTRACK_DESTROY);
+}
+
+/* schedule worker early if we have ctnetlink listeners that subscribed
+ * to CONNTRACK_DESTROY events so they receive more timely notifications.
+ *
+ * ->horizon_count[] contains the number of conntrack entries that are
+ * about the expire in 1, 2, 4, 8, 16 and 32 seconds.
+ */
+static noinline unsigned long
+gc_horizon_next_run(const struct conntrack_gc_work *gc_work,
+ unsigned long next_run, unsigned long delta_time)
+{
+ unsigned int count = 0;
+ unsigned int i;
+
+ if (next_run <= (unsigned long)delta_time)
+ return 1;
+
+ next_run -= delta_time;
+
+ if (!nf_ctnetlink_has_listeners())
+ return next_run;
+
+ for (i = 0; i < ARRAY_SIZE(gc_work->horizon_count); i++) {
+ count += gc_work->horizon_count[i];
+
+ if (count >= GC_SCAN_EXPEDITED) {
+ unsigned long new_next_run = gc_horizon_max(i);
+
+ return min(new_next_run, next_run);
+ }
+ }
+
+ return next_run;
+}
+
static void gc_worker(struct work_struct *work)
{
unsigned int i, hashsz;
@@ -1526,6 +1600,7 @@ static void gc_worker(struct work_struct *work)
gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
gc_work->count = GC_SCAN_INITIAL_COUNT;
gc_work->start_time = start_time;
+ memset(gc_work->horizon_count, 0, sizeof(gc_work->horizon_count));
}
next_run = gc_work->avg_timeout;
@@ -1575,7 +1650,11 @@ static void gc_worker(struct work_struct *work)
continue;
}
- expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+ expires = nf_ct_expires(tmp);
+
+ gc_horizon_account(gc_work, expires);
+
+ expires = clamp(expires, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
expires = (expires - (long)next_run) / ++count;
next_run += expires;
net = nf_ct_net(tmp);
@@ -1633,10 +1712,7 @@ static void gc_worker(struct work_struct *work)
next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX);
delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1);
- if (next_run > (unsigned long)delta_time)
- next_run -= delta_time;
- else
- next_run = 1;
+ next_run = gc_horizon_next_run(gc_work, next_run, delta_time);
early_exit:
if (gc_work->exiting)
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-10-15 13:32 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-29 13:09 [PATCH v1] nf_conntrack: sysctl: expose gc worker scan interval via sysctl avimalin
2025-04-29 13:29 ` Florian Westphal
2025-04-30 6:43 ` [PATCH v2] " avimalin
2025-04-30 7:11 ` Florian Westphal
2025-04-30 7:28 ` [PATCH v3] " avimalin
2025-04-30 7:57 ` Florian Westphal
2025-05-03 2:27 ` Vimal Agrawal
2025-05-08 5:54 ` Vimal Agrawal
2025-10-15 13:32 ` Florian Westphal
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).