From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Scott Mitchell <scott.k.mitch1@gmail.com>
Cc: kadlec@netfilter.org, fw@strlen.de, phil@nwl.cc,
davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, horms@kernel.org,
netfilter-devel@vger.kernel.org, coreteam@netfilter.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
syzbot@syzkaller.appspotmail.com
Subject: Re: [PATCH v5] netfilter: nfnetlink_queue: optimize verdict lookup with hash table
Date: Tue, 13 Jan 2026 01:25:30 +0100 [thread overview]
Message-ID: <aWWQ-ooAmTIEhdHO@chamomile> (raw)
In-Reply-To: <20251122003720.16724-1-scott_mitchell@apple.com>
Hi Scott,
> diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
> index 4aeffddb7586..3d0def310523 100644
> --- a/include/net/netfilter/nf_queue.h
> +++ b/include/net/netfilter/nf_queue.h
> @@ -11,6 +11,7 @@
> /* Each queued (to userspace) skbuff has one of these. */
> struct nf_queue_entry {
> struct list_head list;
> + struct hlist_node hash_node;
> struct sk_buff *skb;
> unsigned int id;
> unsigned int hook_index; /* index in hook_entries->hook[] */
> diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
> index efcb7c044a74..bc296a17e5aa 100644
> --- a/include/uapi/linux/netfilter/nfnetlink_queue.h
> +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
> @@ -107,6 +107,7 @@ enum nfqnl_attr_config {
> NFQA_CFG_QUEUE_MAXLEN, /* __u32 */
> NFQA_CFG_MASK, /* identify which flags to change */
> NFQA_CFG_FLAGS, /* value of these flags (__u32) */
> + NFQA_CFG_HASH_SIZE, /* __u32 hash table size (rounded to power of 2) */
This should use the rhashtable implementation, I don't find a good
reason why this is not used in first place for this enhancement.
> __NFQA_CFG_MAX
> };
> #define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
> diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
> index 8b7b39d8a109..b142fac70ed9 100644
> --- a/net/netfilter/nfnetlink_queue.c
> +++ b/net/netfilter/nfnetlink_queue.c
> @@ -46,7 +46,10 @@
> #include <net/netfilter/nf_conntrack.h>
> #endif
>
> -#define NFQNL_QMAX_DEFAULT 1024
> +#define NFQNL_QMAX_DEFAULT 1024
> +#define NFQNL_MIN_HASH_SIZE 16
> +#define NFQNL_DEFAULT_HASH_SIZE 1024
> +#define NFQNL_MAX_HASH_SIZE 131072
>
> /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
> * includes the header length. Thus, the maximum packet length that we
> @@ -65,6 +68,7 @@ struct nfqnl_instance {
> unsigned int copy_range;
> unsigned int queue_dropped;
> unsigned int queue_user_dropped;
> + unsigned int queue_hash_size;
>
>
> u_int16_t queue_num; /* number of this queue */
> @@ -77,6 +81,8 @@ struct nfqnl_instance {
> spinlock_t lock ____cacheline_aligned_in_smp;
> unsigned int queue_total;
> unsigned int id_sequence; /* 'sequence' of pkt ids */
> + unsigned int queue_hash_mask;
> + struct hlist_head *queue_hash;
> struct list_head queue_list; /* packets in queue */
> };
>
> @@ -95,6 +101,39 @@ static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
> return net_generic(net, nfnl_queue_net_id);
> }
>
> +static inline unsigned int
> +nfqnl_packet_hash(u32 id, unsigned int mask)
> +{
> + return id & mask;
> +}
> +
> +static inline u32
> +nfqnl_normalize_hash_size(u32 hash_size)
> +{
> + /* Must be power of two for queue_hash_mask to work correctly.
> + * Avoid overflow of is_power_of_2 by bounding NFQNL_MAX_HASH_SIZE.
> + */
> + BUILD_BUG_ON(!is_power_of_2(NFQNL_MIN_HASH_SIZE) ||
> + !is_power_of_2(NFQNL_DEFAULT_HASH_SIZE) ||
> + !is_power_of_2(NFQNL_MAX_HASH_SIZE) ||
> + NFQNL_MAX_HASH_SIZE > 1U << 31);
> +
> + if (!hash_size)
> + return NFQNL_DEFAULT_HASH_SIZE;
> +
> + /* Clamp to valid range before power of two to avoid overflow */
> + if (hash_size <= NFQNL_MIN_HASH_SIZE)
> + return NFQNL_MIN_HASH_SIZE;
> +
> + if (hash_size >= NFQNL_MAX_HASH_SIZE)
> + return NFQNL_MAX_HASH_SIZE;
> +
> + if (!is_power_of_2(hash_size))
> + hash_size = roundup_pow_of_two(hash_size);
> +
> + return hash_size;
> +}
> +
> static inline u_int8_t instance_hashfn(u_int16_t queue_num)
> {
> return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
> @@ -114,13 +153,70 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
> return NULL;
> }
>
> +static int
> +nfqnl_hash_resize(struct nfqnl_instance *inst, u32 hash_size)
rhashtable can just handle this for you, then users do not need
to tune this hash_size parameter.
> +{
> + struct hlist_head *new_hash, *old_hash;
> + struct nf_queue_entry *entry;
> + unsigned int h, hash_mask;
> +
> + hash_size = nfqnl_normalize_hash_size(hash_size);
> + if (hash_size == inst->queue_hash_size)
> + return 0;
> +
> + /* GFP_ATOMIC required: called under rcu_read_lock in nfqnl_recv_config.
> + * Using GFP_KERNEL_ACCOUNT would require refactoring lock placement.
> + */
> + new_hash = kvmalloc_array(hash_size, sizeof(*new_hash), GFP_ATOMIC);
> + if (!new_hash)
> + return -ENOMEM;
> +
> + hash_mask = hash_size - 1;
> +
> + for (h = 0; h < hash_size; h++)
> + INIT_HLIST_HEAD(&new_hash[h]);
> +
> + spin_lock_bh(&inst->lock);
> +
> + list_for_each_entry(entry, &inst->queue_list, list) {
> + /* No hlist_del() since old_hash will be freed and we hold lock */
> + h = nfqnl_packet_hash(entry->id, hash_mask);
> + hlist_add_head(&entry->hash_node, &new_hash[h]);
> + }
> +
> + old_hash = inst->queue_hash;
> + inst->queue_hash_size = hash_size;
> + inst->queue_hash_mask = hash_mask;
> + inst->queue_hash = new_hash;
> +
> + spin_unlock_bh(&inst->lock);
> +
> + kvfree(old_hash);
> +
> + return 0;
> +}
> +
> static struct nfqnl_instance *
> -instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
> +instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid,
> + u32 hash_size)
> {
> struct nfqnl_instance *inst;
> + struct hlist_head *queue_hash;
> unsigned int h;
> int err;
>
> + hash_size = nfqnl_normalize_hash_size(hash_size);
> +
> + /* GFP_ATOMIC required: called under rcu_read_lock in nfqnl_recv_config.
> + * Using GFP_KERNEL_ACCOUNT would require refactoring lock placement.
> + */
> + queue_hash = kvmalloc_array(hash_size, sizeof(*queue_hash), GFP_ATOMIC);
If rhashtable is used, this can be allocate perns and then you avoid
this GFP_ATOMIC for each instance.
> + if (!queue_hash)
> + return ERR_PTR(-ENOMEM);
> +
> + for (h = 0; h < hash_size; h++)
> + INIT_HLIST_HEAD(&queue_hash[h]);
> +
> spin_lock(&q->instances_lock);
> if (instance_lookup(q, queue_num)) {
> err = -EEXIST;
next prev parent reply other threads:[~2026-01-13 0:25 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-22 0:37 [PATCH v5] netfilter: nfnetlink_queue: optimize verdict lookup with hash table Scott Mitchell
2025-12-03 18:33 ` Scott Mitchell
2025-12-03 18:40 ` Florian Westphal
2025-12-03 21:07 ` Scott Mitchell
2026-01-13 0:25 ` Pablo Neira Ayuso [this message]
2026-01-14 1:32 ` Scott Mitchell
2026-01-15 0:50 ` Pablo Neira Ayuso
2026-01-15 17:07 ` Florian Westphal
2026-01-17 17:33 ` Scott Mitchell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aWWQ-ooAmTIEhdHO@chamomile \
--to=pablo@netfilter.org \
--cc=coreteam@netfilter.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=fw@strlen.de \
--cc=horms@kernel.org \
--cc=kadlec@netfilter.org \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=phil@nwl.cc \
--cc=scott.k.mitch1@gmail.com \
--cc=syzbot@syzkaller.appspotmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.