From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Scott Mitchell <scott.k.mitch1@gmail.com>
Cc: kadlec@netfilter.org, fw@strlen.de, phil@nwl.cc,
davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, horms@kernel.org,
netfilter-devel@vger.kernel.org, coreteam@netfilter.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
syzbot@syzkaller.appspotmail.com
Subject: Re: [PATCH v5] netfilter: nfnetlink_queue: optimize verdict lookup with hash table
Date: Tue, 13 Jan 2026 01:25:30 +0100 [thread overview]
Message-ID: <aWWQ-ooAmTIEhdHO@chamomile> (raw)
In-Reply-To: <20251122003720.16724-1-scott_mitchell@apple.com>
Hi Scott,
> diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
> index 4aeffddb7586..3d0def310523 100644
> --- a/include/net/netfilter/nf_queue.h
> +++ b/include/net/netfilter/nf_queue.h
> @@ -11,6 +11,7 @@
> /* Each queued (to userspace) skbuff has one of these. */
> struct nf_queue_entry {
> struct list_head list;
> + struct hlist_node hash_node;
> struct sk_buff *skb;
> unsigned int id;
> unsigned int hook_index; /* index in hook_entries->hook[] */
> diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
> index efcb7c044a74..bc296a17e5aa 100644
> --- a/include/uapi/linux/netfilter/nfnetlink_queue.h
> +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
> @@ -107,6 +107,7 @@ enum nfqnl_attr_config {
> NFQA_CFG_QUEUE_MAXLEN, /* __u32 */
> NFQA_CFG_MASK, /* identify which flags to change */
> NFQA_CFG_FLAGS, /* value of these flags (__u32) */
> + NFQA_CFG_HASH_SIZE, /* __u32 hash table size (rounded to power of 2) */
This should use the rhashtable implementation, I don't find a good
reason why this is not used in first place for this enhancement.
> __NFQA_CFG_MAX
> };
> #define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
> diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
> index 8b7b39d8a109..b142fac70ed9 100644
> --- a/net/netfilter/nfnetlink_queue.c
> +++ b/net/netfilter/nfnetlink_queue.c
> @@ -46,7 +46,10 @@
> #include <net/netfilter/nf_conntrack.h>
> #endif
>
> -#define NFQNL_QMAX_DEFAULT 1024
> +#define NFQNL_QMAX_DEFAULT 1024
> +#define NFQNL_MIN_HASH_SIZE 16
> +#define NFQNL_DEFAULT_HASH_SIZE 1024
> +#define NFQNL_MAX_HASH_SIZE 131072
>
> /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
> * includes the header length. Thus, the maximum packet length that we
> @@ -65,6 +68,7 @@ struct nfqnl_instance {
> unsigned int copy_range;
> unsigned int queue_dropped;
> unsigned int queue_user_dropped;
> + unsigned int queue_hash_size;
>
>
> u_int16_t queue_num; /* number of this queue */
> @@ -77,6 +81,8 @@ struct nfqnl_instance {
> spinlock_t lock ____cacheline_aligned_in_smp;
> unsigned int queue_total;
> unsigned int id_sequence; /* 'sequence' of pkt ids */
> + unsigned int queue_hash_mask;
> + struct hlist_head *queue_hash;
> struct list_head queue_list; /* packets in queue */
> };
>
> @@ -95,6 +101,39 @@ static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
> return net_generic(net, nfnl_queue_net_id);
> }
>
> +static inline unsigned int
> +nfqnl_packet_hash(u32 id, unsigned int mask)
> +{
> + return id & mask;
> +}
> +
> +static inline u32
> +nfqnl_normalize_hash_size(u32 hash_size)
> +{
> + /* Must be power of two for queue_hash_mask to work correctly.
> + * Avoid overflow of is_power_of_2 by bounding NFQNL_MAX_HASH_SIZE.
> + */
> + BUILD_BUG_ON(!is_power_of_2(NFQNL_MIN_HASH_SIZE) ||
> + !is_power_of_2(NFQNL_DEFAULT_HASH_SIZE) ||
> + !is_power_of_2(NFQNL_MAX_HASH_SIZE) ||
> + NFQNL_MAX_HASH_SIZE > 1U << 31);
> +
> + if (!hash_size)
> + return NFQNL_DEFAULT_HASH_SIZE;
> +
> + /* Clamp to valid range before power of two to avoid overflow */
> + if (hash_size <= NFQNL_MIN_HASH_SIZE)
> + return NFQNL_MIN_HASH_SIZE;
> +
> + if (hash_size >= NFQNL_MAX_HASH_SIZE)
> + return NFQNL_MAX_HASH_SIZE;
> +
> + if (!is_power_of_2(hash_size))
> + hash_size = roundup_pow_of_two(hash_size);
> +
> + return hash_size;
> +}
> +
> static inline u_int8_t instance_hashfn(u_int16_t queue_num)
> {
> return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
> @@ -114,13 +153,70 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
> return NULL;
> }
>
> +static int
> +nfqnl_hash_resize(struct nfqnl_instance *inst, u32 hash_size)
rhashtable can just handle this for you, then users do not need
to tune this hash_size parameter.
> +{
> + struct hlist_head *new_hash, *old_hash;
> + struct nf_queue_entry *entry;
> + unsigned int h, hash_mask;
> +
> + hash_size = nfqnl_normalize_hash_size(hash_size);
> + if (hash_size == inst->queue_hash_size)
> + return 0;
> +
> + /* GFP_ATOMIC required: called under rcu_read_lock in nfqnl_recv_config.
> + * Using GFP_KERNEL_ACCOUNT would require refactoring lock placement.
> + */
> + new_hash = kvmalloc_array(hash_size, sizeof(*new_hash), GFP_ATOMIC);
> + if (!new_hash)
> + return -ENOMEM;
> +
> + hash_mask = hash_size - 1;
> +
> + for (h = 0; h < hash_size; h++)
> + INIT_HLIST_HEAD(&new_hash[h]);
> +
> + spin_lock_bh(&inst->lock);
> +
> + list_for_each_entry(entry, &inst->queue_list, list) {
> + /* No hlist_del() since old_hash will be freed and we hold lock */
> + h = nfqnl_packet_hash(entry->id, hash_mask);
> + hlist_add_head(&entry->hash_node, &new_hash[h]);
> + }
> +
> + old_hash = inst->queue_hash;
> + inst->queue_hash_size = hash_size;
> + inst->queue_hash_mask = hash_mask;
> + inst->queue_hash = new_hash;
> +
> + spin_unlock_bh(&inst->lock);
> +
> + kvfree(old_hash);
> +
> + return 0;
> +}
> +
> static struct nfqnl_instance *
> -instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
> +instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid,
> + u32 hash_size)
> {
> struct nfqnl_instance *inst;
> + struct hlist_head *queue_hash;
> unsigned int h;
> int err;
>
> + hash_size = nfqnl_normalize_hash_size(hash_size);
> +
> + /* GFP_ATOMIC required: called under rcu_read_lock in nfqnl_recv_config.
> + * Using GFP_KERNEL_ACCOUNT would require refactoring lock placement.
> + */
> + queue_hash = kvmalloc_array(hash_size, sizeof(*queue_hash), GFP_ATOMIC);
If rhashtable is used, this can be allocate perns and then you avoid
this GFP_ATOMIC for each instance.
> + if (!queue_hash)
> + return ERR_PTR(-ENOMEM);
> +
> + for (h = 0; h < hash_size; h++)
> + INIT_HLIST_HEAD(&queue_hash[h]);
> +
> spin_lock(&q->instances_lock);
> if (instance_lookup(q, queue_num)) {
> err = -EEXIST;
next prev parent reply other threads:[~2026-01-13 0:25 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-22 0:37 [PATCH v5] netfilter: nfnetlink_queue: optimize verdict lookup with hash table Scott Mitchell
2025-12-03 18:33 ` Scott Mitchell
2025-12-03 18:40 ` Florian Westphal
2025-12-03 21:07 ` Scott Mitchell
2026-01-13 0:25 ` Pablo Neira Ayuso [this message]
2026-01-14 1:32 ` Scott Mitchell
2026-01-15 0:50 ` Pablo Neira Ayuso
2026-01-15 17:07 ` Florian Westphal
2026-01-17 17:33 ` Scott Mitchell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aWWQ-ooAmTIEhdHO@chamomile \
--to=pablo@netfilter.org \
--cc=coreteam@netfilter.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=horms@kernel.org \
--cc=kadlec@netfilter.org \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=phil@nwl.cc \
--cc=scott.k.mitch1@gmail.com \
--cc=syzbot@syzkaller.appspotmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox