From: Florian Westphal <fw@strlen.de>
To: <netdev@vger.kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>,
<netfilter-devel@vger.kernel.org>,
pablo@netfilter.org
Subject: [PATCH net 6/7] netfilter: nfnetlink_queue: make hash table per queue
Date: Wed, 8 Apr 2026 18:35:11 +0200 [thread overview]
Message-ID: <20260408163512.30537-7-fw@strlen.de> (raw)
In-Reply-To: <20260408163512.30537-1-fw@strlen.de>
Sharing a global hash table among all queues is tempting, but
it can cause crash:
BUG: KASAN: slab-use-after-free in nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue]
[..]
nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue]
nfnetlink_rcv_msg+0x46a/0x930
kmem_cache_alloc_node_noprof+0x11e/0x450
struct nf_queue_entry is freed via kfree, but parallel cpu can still
encounter such an nf_queue_entry when walking the list.
Alternative fix is to free the nf_queue_entry via kfree_rcu() instead,
but as we have to alloc/free for each skb this will cause more mem
pressure.
Cc: Scott Mitchell <scott.k.mitch1@gmail.com>
Fixes: e19079adcd26 ("netfilter: nfnetlink_queue: optimize verdict lookup with hash table")
Signed-off-by: Florian Westphal <fw@strlen.de>
---
include/net/netfilter/nf_queue.h | 1 -
net/netfilter/nfnetlink_queue.c | 139 +++++++++++--------------------
2 files changed, 49 insertions(+), 91 deletions(-)
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 45eb26b2e95b..d17035d14d96 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -23,7 +23,6 @@ struct nf_queue_entry {
struct nf_hook_state state;
bool nf_ct_is_unconfirmed;
u16 size; /* sizeof(entry) + saved route keys */
- u16 queue_num;
/* extra space to store route keys */
};
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 47f7f62906e2..8e02f84784da 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -49,8 +49,8 @@
#endif
#define NFQNL_QMAX_DEFAULT 1024
-#define NFQNL_HASH_MIN 1024
-#define NFQNL_HASH_MAX 1048576
+#define NFQNL_HASH_MIN 8
+#define NFQNL_HASH_MAX 32768
/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
* includes the header length. Thus, the maximum packet length that we
@@ -60,29 +60,10 @@
*/
#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
-/* Composite key for packet lookup: (net, queue_num, packet_id) */
-struct nfqnl_packet_key {
- possible_net_t net;
- u32 packet_id;
- u16 queue_num;
-} __aligned(sizeof(u32)); /* jhash2 requires 32-bit alignment */
-
-/* Global rhashtable - one for entire system, all netns */
-static struct rhashtable nfqnl_packet_map __read_mostly;
-
-/* Helper to initialize composite key */
-static inline void nfqnl_init_key(struct nfqnl_packet_key *key,
- struct net *net, u32 packet_id, u16 queue_num)
-{
- memset(key, 0, sizeof(*key));
- write_pnet(&key->net, net);
- key->packet_id = packet_id;
- key->queue_num = queue_num;
-}
-
struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
- struct rcu_head rcu;
+ struct rhashtable nfqnl_packet_map;
+ struct rcu_work rwork;
u32 peer_portid;
unsigned int queue_maxlen;
@@ -106,6 +87,7 @@ struct nfqnl_instance {
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
+static struct workqueue_struct *nfq_cleanup_wq __read_mostly;
static unsigned int nfnl_queue_net_id __read_mostly;
#define INSTANCE_BUCKETS 16
@@ -124,34 +106,10 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
}
-/* Extract composite key from nf_queue_entry for hashing */
-static u32 nfqnl_packet_obj_hashfn(const void *data, u32 len, u32 seed)
-{
- const struct nf_queue_entry *entry = data;
- struct nfqnl_packet_key key;
-
- nfqnl_init_key(&key, entry->state.net, entry->id, entry->queue_num);
-
- return jhash2((u32 *)&key, sizeof(key) / sizeof(u32), seed);
-}
-
-/* Compare stack-allocated key against entry */
-static int nfqnl_packet_obj_cmpfn(struct rhashtable_compare_arg *arg,
- const void *obj)
-{
- const struct nfqnl_packet_key *key = arg->key;
- const struct nf_queue_entry *entry = obj;
-
- return !net_eq(entry->state.net, read_pnet(&key->net)) ||
- entry->queue_num != key->queue_num ||
- entry->id != key->packet_id;
-}
-
static const struct rhashtable_params nfqnl_rhashtable_params = {
.head_offset = offsetof(struct nf_queue_entry, hash_node),
- .key_len = sizeof(struct nfqnl_packet_key),
- .obj_hashfn = nfqnl_packet_obj_hashfn,
- .obj_cmpfn = nfqnl_packet_obj_cmpfn,
+ .key_offset = offsetof(struct nf_queue_entry, id),
+ .key_len = sizeof(u32),
.automatic_shrinking = true,
.min_size = NFQNL_HASH_MIN,
.max_size = NFQNL_HASH_MAX,
@@ -190,6 +148,10 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
spin_lock_init(&inst->lock);
INIT_LIST_HEAD(&inst->queue_list);
+ err = rhashtable_init(&inst->nfqnl_packet_map, &nfqnl_rhashtable_params);
+ if (err < 0)
+ goto out_free;
+
spin_lock(&q->instances_lock);
if (instance_lookup(q, queue_num)) {
err = -EEXIST;
@@ -210,6 +172,8 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
out_unlock:
spin_unlock(&q->instances_lock);
+ rhashtable_destroy(&inst->nfqnl_packet_map);
+out_free:
kfree(inst);
return ERR_PTR(err);
}
@@ -217,15 +181,18 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
unsigned long data);
-static void
-instance_destroy_rcu(struct rcu_head *head)
+static void instance_destroy_work(struct work_struct *work)
{
- struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
- rcu);
+ struct nfqnl_instance *inst;
+ inst = container_of(to_rcu_work(work), struct nfqnl_instance,
+ rwork);
rcu_read_lock();
nfqnl_flush(inst, NULL, 0);
rcu_read_unlock();
+
+ rhashtable_destroy(&inst->nfqnl_packet_map);
+
kfree(inst);
module_put(THIS_MODULE);
}
@@ -234,7 +201,9 @@ static void
__instance_destroy(struct nfqnl_instance *inst)
{
hlist_del_rcu(&inst->hlist);
- call_rcu(&inst->rcu, instance_destroy_rcu);
+
+ INIT_RCU_WORK(&inst->rwork, instance_destroy_work);
+ queue_rcu_work(nfq_cleanup_wq, &inst->rwork);
}
static void
@@ -250,9 +219,7 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
int err;
- entry->queue_num = queue->queue_num;
-
- err = rhashtable_insert_fast(&nfqnl_packet_map, &entry->hash_node,
+ err = rhashtable_insert_fast(&queue->nfqnl_packet_map, &entry->hash_node,
nfqnl_rhashtable_params);
if (unlikely(err))
return err;
@@ -266,23 +233,19 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
static void
__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
- rhashtable_remove_fast(&nfqnl_packet_map, &entry->hash_node,
+ rhashtable_remove_fast(&queue->nfqnl_packet_map, &entry->hash_node,
nfqnl_rhashtable_params);
list_del(&entry->list);
queue->queue_total--;
}
static struct nf_queue_entry *
-find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id,
- struct net *net)
+find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
{
- struct nfqnl_packet_key key;
struct nf_queue_entry *entry;
- nfqnl_init_key(&key, net, id, queue->queue_num);
-
spin_lock_bh(&queue->lock);
- entry = rhashtable_lookup_fast(&nfqnl_packet_map, &key,
+ entry = rhashtable_lookup_fast(&queue->nfqnl_packet_map, &id,
nfqnl_rhashtable_params);
if (entry)
@@ -1531,7 +1494,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
verdict = ntohl(vhdr->verdict);
- entry = find_dequeue_entry(queue, ntohl(vhdr->id), info->net);
+ entry = find_dequeue_entry(queue, ntohl(vhdr->id));
if (entry == NULL)
return -ENOENT;
@@ -1880,40 +1843,38 @@ static int __init nfnetlink_queue_init(void)
{
int status;
- status = rhashtable_init(&nfqnl_packet_map, &nfqnl_rhashtable_params);
- if (status < 0)
- return status;
+ nfq_cleanup_wq = alloc_ordered_workqueue("nfq_workqueue", 0);
+ if (!nfq_cleanup_wq)
+ return -ENOMEM;
status = register_pernet_subsys(&nfnl_queue_net_ops);
- if (status < 0) {
- pr_err("failed to register pernet ops\n");
- goto cleanup_rhashtable;
- }
+ if (status < 0)
+ goto cleanup_pernet_subsys;
- netlink_register_notifier(&nfqnl_rtnl_notifier);
- status = nfnetlink_subsys_register(&nfqnl_subsys);
- if (status < 0) {
- pr_err("failed to create netlink socket\n");
- goto cleanup_netlink_notifier;
- }
+ status = netlink_register_notifier(&nfqnl_rtnl_notifier);
+ if (status < 0)
+ goto cleanup_rtnl_notifier;
status = register_netdevice_notifier(&nfqnl_dev_notifier);
- if (status < 0) {
- pr_err("failed to register netdevice notifier\n");
- goto cleanup_netlink_subsys;
- }
+ if (status < 0)
+ goto cleanup_dev_notifier;
+
+ status = nfnetlink_subsys_register(&nfqnl_subsys);
+ if (status < 0)
+ goto cleanup_nfqnl_subsys;
nf_register_queue_handler(&nfqh);
return status;
-cleanup_netlink_subsys:
- nfnetlink_subsys_unregister(&nfqnl_subsys);
-cleanup_netlink_notifier:
+cleanup_nfqnl_subsys:
+ unregister_netdevice_notifier(&nfqnl_dev_notifier);
+cleanup_dev_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+cleanup_rtnl_notifier:
unregister_pernet_subsys(&nfnl_queue_net_ops);
-cleanup_rhashtable:
- rhashtable_destroy(&nfqnl_packet_map);
+cleanup_pernet_subsys:
+ destroy_workqueue(nfq_cleanup_wq);
return status;
}
@@ -1924,9 +1885,7 @@ static void __exit nfnetlink_queue_fini(void)
nfnetlink_subsys_unregister(&nfqnl_subsys);
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_queue_net_ops);
-
- rhashtable_destroy(&nfqnl_packet_map);
-
+ destroy_workqueue(nfq_cleanup_wq);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
--
2.52.0
next prev parent reply other threads:[~2026-04-08 16:35 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-08 16:35 [PATCH net 0/7] netfilter updates for net Florian Westphal
2026-04-08 16:35 ` [PATCH net 1/7] ipvs: fix NULL deref in ip_vs_add_service error path Florian Westphal
2026-04-08 16:35 ` [PATCH net 2/7] netfilter: nfnetlink_log: initialize nfgenmsg in NLMSG_DONE terminator Florian Westphal
2026-04-08 16:35 ` [PATCH net 3/7] netfilter: xt_multiport: validate range encoding in checkentry Florian Westphal
2026-04-08 16:35 ` [PATCH net 4/7] netfilter: ip6t_eui64: reject invalid MAC header for all packets Florian Westphal
2026-04-08 16:35 ` [PATCH net 5/7] netfilter: nft_ct: fix use-after-free in timeout object destroy Florian Westphal
2026-04-08 16:35 ` Florian Westphal [this message]
2026-04-08 16:35 ` [PATCH net 7/7] selftests: nft_queue.sh: add a parallel stress test Florian Westphal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260408163512.30537-7-fw@strlen.de \
--to=fw@strlen.de \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=pablo@netfilter.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox