netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Florian Westphal <fw@strlen.de>
To: avimalin@gmail.com
Cc: vimal.agrawal@sophos.com, linux-kernel@vger.kernel.org,
	pablo@netfilter.org, netfilter-devel@vger.kernel.org,
	anirudh.gupta@sophos.com
Subject: Re: [PATCH v3] nf_conntrack: sysctl: expose gc worker scan interval via sysctl
Date: Wed, 15 Oct 2025 15:32:39 +0200	[thread overview]
Message-ID: <aO-id5W6Tr7frdHN@strlen.de> (raw)
In-Reply-To: <20250430072810.63169-1-vimal.agrawal@sophos.com>

avimalin@gmail.com <avimalin@gmail.com> wrote:
> Default initial gc scan interval of 60 secs is too long for system
> with low number of conntracks causing delay in conntrack deletion.
> It is affecting userspace which are replying on timely arrival of
> conntrack destroy event. So it is better that this is controlled
> through sysctl

Patch is fine.  I do wonder however if there are alternatives.
Rather than expose the gc interval (gc worker is internal implementation
detail, e.g. we could move back to per-ct timers theoretically).

What about something like this (untested):

[RFC] netfilter: conntrack: expedite evictions when userspace is subscribed to destroy events

Track number of soon-to-expire conntracks.
If enough entries are likely to expire within 1/2/4/8/16/32 second buckets,
then reschedule earlier than what the normal next value would be.

Do this only when userspace is listening to destroy event notifcations
via ctnetlink, otherwise its not relevant when a conntrack entry is
released.

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 210792a2275d..22274193b093 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -52,6 +52,8 @@
 #include <net/netns/hash.h>
 #include <net/ip.h>
 
+#include <uapi/linux/netfilter/nfnetlink.h>
+
 #include "nf_internals.h"
 
 __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
@@ -63,12 +65,15 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_hash);
 
+#define GC_HORIZON_BUCKETS	6
+
 struct conntrack_gc_work {
 	struct delayed_work	dwork;
 	u32			next_bucket;
 	u32			avg_timeout;
 	u32			count;
 	u32			start_time;
+	u8			horizon_count[GC_HORIZON_BUCKETS];
 	bool			exiting;
 	bool			early_drop;
 };
@@ -96,6 +101,10 @@ static DEFINE_MUTEX(nf_conntrack_mutex);
 #define GC_SCAN_MAX_DURATION	msecs_to_jiffies(10)
 #define GC_SCAN_EXPIRED_MAX	(64000u / HZ)
 
+/* schedule worker earlier if this many entries are about to expire
+ * in the near future */
+#define GC_SCAN_EXPEDITED	min(255, (GC_HORIZON_BUCKETS * GC_SCAN_EXPIRED_MAX))
+
 #define MIN_CHAINLEN	50u
 #define MAX_CHAINLEN	(80u - MIN_CHAINLEN)
 
@@ -1508,6 +1517,71 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
 	return false;
 }
 
+static unsigned int gc_horizon_max(unsigned int i)
+{
+	return (1 << i) * HZ;
+}
+
+static void gc_horizon_account(struct conntrack_gc_work *gc, unsigned long expires)
+{
+	int i = ARRAY_SIZE(gc->horizon_count);
+
+	BUILD_BUG_ON(GC_SCAN_EXPEDITED > 255);
+
+	for (i = 0; i < ARRAY_SIZE(gc->horizon_count); i++) {
+		unsigned int max = gc_horizon_max(i);
+
+		if (gc->horizon_count[i] >= GC_SCAN_EXPEDITED)
+			return;
+
+		if (expires <= max) {
+			gc->horizon_count[i]++;
+			return;
+		}
+	}
+}
+
+static bool nf_ctnetlink_has_listeners(void)
+{
+	u8 v = READ_ONCE(nf_ctnetlink_has_listener);
+
+	return v & (1 << NFNLGRP_CONNTRACK_DESTROY);
+}
+
+/* schedule worker early if we have ctnetlink listeners that subscribed
+ * to CONNTRACK_DESTROY events so they receive more timely notifications.
+ *
+ * ->horizon_count[] contains the number of conntrack entries that are
+ *  about the expire in 1, 2, 4, 8, 16 and 32 seconds.
+ */
+static noinline unsigned long
+gc_horizon_next_run(const struct conntrack_gc_work *gc_work,
+		    unsigned long next_run, unsigned long delta_time)
+{
+	unsigned int count = 0;
+	unsigned int i;
+
+	if (next_run <= (unsigned long)delta_time)
+		return 1;
+
+	next_run -= delta_time;
+
+	if (!nf_ctnetlink_has_listeners())
+		return next_run;
+
+	for (i = 0; i < ARRAY_SIZE(gc_work->horizon_count); i++) {
+		count += gc_work->horizon_count[i];
+
+		if (count >= GC_SCAN_EXPEDITED) {
+			unsigned long new_next_run = gc_horizon_max(i);
+
+			return min(new_next_run, next_run);
+		}
+	}
+
+	return next_run;
+}
+
 static void gc_worker(struct work_struct *work)
 {
 	unsigned int i, hashsz;
@@ -1526,6 +1600,7 @@ static void gc_worker(struct work_struct *work)
 		gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
 		gc_work->count = GC_SCAN_INITIAL_COUNT;
 		gc_work->start_time = start_time;
+		memset(gc_work->horizon_count, 0, sizeof(gc_work->horizon_count));
 	}
 
 	next_run = gc_work->avg_timeout;
@@ -1575,7 +1650,11 @@ static void gc_worker(struct work_struct *work)
 				continue;
 			}
 
-			expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+			expires = nf_ct_expires(tmp);
+
+			gc_horizon_account(gc_work, expires);
+
+			expires = clamp(expires, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
 			expires = (expires - (long)next_run) / ++count;
 			next_run += expires;
 			net = nf_ct_net(tmp);
@@ -1633,10 +1712,7 @@ static void gc_worker(struct work_struct *work)
 	next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX);
 
 	delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1);
-	if (next_run > (unsigned long)delta_time)
-		next_run -= delta_time;
-	else
-		next_run = 1;
+	next_run = gc_horizon_next_run(gc_work, next_run, delta_time);
 
 early_exit:
 	if (gc_work->exiting)

      parent reply	other threads:[~2025-10-15 13:32 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-29 13:09 [PATCH v1] nf_conntrack: sysctl: expose gc worker scan interval via sysctl avimalin
2025-04-29 13:29 ` Florian Westphal
2025-04-30  6:43   ` [PATCH v2] " avimalin
2025-04-30  7:11     ` Florian Westphal
2025-04-30  7:28       ` [PATCH v3] " avimalin
2025-04-30  7:57         ` Florian Westphal
2025-05-03  2:27           ` Vimal Agrawal
2025-05-08  5:54             ` Vimal Agrawal
2025-10-15 13:32         ` Florian Westphal [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aO-id5W6Tr7frdHN@strlen.de \
    --to=fw@strlen.de \
    --cc=anirudh.gupta@sophos.com \
    --cc=avimalin@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pablo@netfilter.org \
    --cc=vimal.agrawal@sophos.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).