From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org
Subject: [PATCH net-next 01/19] netfilter: conntrack: revisit gc autotuning
Date: Mon, 21 Mar 2022 13:30:34 +0100 [thread overview]
Message-ID: <20220321123052.70553-2-pablo@netfilter.org> (raw)
In-Reply-To: <20220321123052.70553-1-pablo@netfilter.org>
From: Florian Westphal <fw@strlen.de>
as of commit 4608fdfc07e1
("netfilter: conntrack: collect all entries in one cycle")
conntrack gc was changed to run every 2 minutes.
On systems where conntrack hash table is set to large value, most evictions
happen from gc worker rather than the packet path due to hash table
distribution.
This causes netlink event overflows when events are collected.
This change collects average expiry of scanned entries and
reschedules to the average remaining value, within 1 to 60 second interval.
To avoid event overflows, reschedule after each bucket and add a
limit for both run time and number of evictions per run.
If more entries have to be evicted, reschedule and restart 1 jiffy
into the future.
Reported-by: Karel Rericha <karel@maxtel.cz>
Cc: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Cc: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_core.c | 85 ++++++++++++++++++++++++-------
1 file changed, 68 insertions(+), 17 deletions(-)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d1a58ed357a4..0164e5f522e8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,6 +66,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
struct conntrack_gc_work {
struct delayed_work dwork;
u32 next_bucket;
+ u32 avg_timeout;
+ u32 start_time;
bool exiting;
bool early_drop;
};
@@ -77,8 +79,19 @@ static __read_mostly bool nf_conntrack_locks_all;
/* serialize hash resizes and nf_ct_iterate_cleanup */
static DEFINE_MUTEX(nf_conntrack_mutex);
-#define GC_SCAN_INTERVAL (120u * HZ)
+#define GC_SCAN_INTERVAL_MAX (60ul * HZ)
+#define GC_SCAN_INTERVAL_MIN (1ul * HZ)
+
+/* clamp timeouts to this value (TCP unacked) */
+#define GC_SCAN_INTERVAL_CLAMP (300ul * HZ)
+
+/* large initial bias so that we don't scan often just because we have
+ * three entries with a 1s timeout.
+ */
+#define GC_SCAN_INTERVAL_INIT INT_MAX
+
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
+#define GC_SCAN_EXPIRED_MAX (64000u / HZ)
#define MIN_CHAINLEN 8u
#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
@@ -1420,16 +1433,28 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
static void gc_worker(struct work_struct *work)
{
- unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
unsigned int i, hashsz, nf_conntrack_max95 = 0;
- unsigned long next_run = GC_SCAN_INTERVAL;
+ u32 end_time, start_time = nfct_time_stamp;
struct conntrack_gc_work *gc_work;
+ unsigned int expired_count = 0;
+ unsigned long next_run;
+ s32 delta_time;
+
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
i = gc_work->next_bucket;
if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
+ if (i == 0) {
+ gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+ gc_work->start_time = start_time;
+ }
+
+ next_run = gc_work->avg_timeout;
+
+ end_time = start_time + GC_SCAN_MAX_DURATION;
+
do {
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
@@ -1446,6 +1471,7 @@ static void gc_worker(struct work_struct *work)
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct nf_conntrack_net *cnet;
+ unsigned long expires;
struct net *net;
tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -1455,11 +1481,29 @@ static void gc_worker(struct work_struct *work)
continue;
}
+ if (expired_count > GC_SCAN_EXPIRED_MAX) {
+ rcu_read_unlock();
+
+ gc_work->next_bucket = i;
+ gc_work->avg_timeout = next_run;
+
+ delta_time = nfct_time_stamp - gc_work->start_time;
+
+ /* re-sched immediately if total cycle time is exceeded */
+ next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX;
+ goto early_exit;
+ }
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
+ expired_count++;
continue;
}
+ expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+ next_run += expires;
+ next_run /= 2u;
+
if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
continue;
@@ -1477,8 +1521,10 @@ static void gc_worker(struct work_struct *work)
continue;
}
- if (gc_worker_can_early_drop(tmp))
+ if (gc_worker_can_early_drop(tmp)) {
nf_ct_kill(tmp);
+ expired_count++;
+ }
nf_ct_put(tmp);
}
@@ -1491,33 +1537,38 @@ static void gc_worker(struct work_struct *work)
cond_resched();
i++;
- if (time_after(jiffies, end_time) && i < hashsz) {
+ delta_time = nfct_time_stamp - end_time;
+ if (delta_time > 0 && i < hashsz) {
+ gc_work->avg_timeout = next_run;
gc_work->next_bucket = i;
next_run = 0;
- break;
+ goto early_exit;
}
} while (i < hashsz);
+ gc_work->next_bucket = 0;
+
+ next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX);
+
+ delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1);
+ if (next_run > (unsigned long)delta_time)
+ next_run -= delta_time;
+ else
+ next_run = 1;
+
+early_exit:
if (gc_work->exiting)
return;
- /*
- * Eviction will normally happen from the packet path, and not
- * from this gc worker.
- *
- * This worker is only here to reap expired entries when system went
- * idle after a busy period.
- */
- if (next_run) {
+ if (next_run)
gc_work->early_drop = false;
- gc_work->next_bucket = 0;
- }
+
queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
- INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
+ INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
gc_work->exiting = false;
}
--
2.30.2
next prev parent reply other threads:[~2022-03-21 12:31 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-21 12:30 [PATCH net-next 00/19] Netfilter updates for net-next Pablo Neira Ayuso
2022-03-21 12:30 ` Pablo Neira Ayuso [this message]
2022-03-21 13:20 ` [PATCH net-next 01/19] netfilter: conntrack: revisit gc autotuning patchwork-bot+netdevbpf
2022-03-21 12:30 ` [PATCH net-next 02/19] netfilter: conntrack: Add and use nf_ct_set_auto_assign_helper_warned() Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 03/19] netfilter: nf_tables: do not reduce read-only expressions Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 04/19] netfilter: nf_tables: cancel tracking for clobbered destination registers Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 05/19] netfilter: nft_ct: track register operations Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 06/19] netfilter: nft_lookup: only cancel tracking for clobbered dregs Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 07/19] netfilter: nft_meta: extend reduce support to bridge family Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 08/19] netfilter: nft_numgen: cancel register tracking Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 09/19] netfilter: nft_osf: track register operations Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 10/19] netfilter: nft_hash: " Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 11/19] netfilter: nft_immediate: cancel register tracking for data destination register Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 12/19] netfilter: nft_socket: track register operations Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 13/19] netfilter: nft_xfrm: " Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 14/19] netfilter: nft_tunnel: " Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 15/19] netfilter: nft_fib: add reduce support Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 16/19] netfilter: nft_exthdr: " Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 17/19] netfilter: nf_nat_h323: eliminate anonymous module_init & module_exit Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 18/19] netfilter: flowtable: remove redundant field in flow_offload_work struct Pablo Neira Ayuso
2022-03-21 12:30 ` [PATCH net-next 19/19] netfilter: flowtable: pass flowtable to nf_flow_table_iterate() Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220321123052.70553-2-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).