Re: [PATCH v5] netfilter: nfnetlink_queue: optimize verdict lookup with hash table

public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed

From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Scott Mitchell <scott.k.mitch1@gmail.com>
Cc: kadlec@netfilter.org, fw@strlen.de, phil@nwl.cc,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, horms@kernel.org,
	netfilter-devel@vger.kernel.org, coreteam@netfilter.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	syzbot@syzkaller.appspotmail.com
Subject: Re: [PATCH v5] netfilter: nfnetlink_queue: optimize verdict lookup with hash table
Date: Tue, 13 Jan 2026 01:25:30 +0100	[thread overview]
Message-ID: <aWWQ-ooAmTIEhdHO@chamomile> (raw)
In-Reply-To: <20251122003720.16724-1-scott_mitchell@apple.com>

Hi Scott,

> diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
> index 4aeffddb7586..3d0def310523 100644
> --- a/include/net/netfilter/nf_queue.h
> +++ b/include/net/netfilter/nf_queue.h
> @@ -11,6 +11,7 @@
>  /* Each queued (to userspace) skbuff has one of these. */
>  struct nf_queue_entry {
>  	struct list_head	list;
> +	struct hlist_node	hash_node;
>  	struct sk_buff		*skb;
>  	unsigned int		id;
>  	unsigned int		hook_index;	/* index in hook_entries->hook[] */
> diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
> index efcb7c044a74..bc296a17e5aa 100644
> --- a/include/uapi/linux/netfilter/nfnetlink_queue.h
> +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
> @@ -107,6 +107,7 @@ enum nfqnl_attr_config {
>  	NFQA_CFG_QUEUE_MAXLEN,		/* __u32 */
>  	NFQA_CFG_MASK,			/* identify which flags to change */
>  	NFQA_CFG_FLAGS,			/* value of these flags (__u32) */
> +	NFQA_CFG_HASH_SIZE,		/* __u32 hash table size (rounded to power of 2) */

This should use the rhashtable implementation, I don't find a good
reason why this is not used in first place for this enhancement.

>  	__NFQA_CFG_MAX
>  };
>  #define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
> diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
> index 8b7b39d8a109..b142fac70ed9 100644
> --- a/net/netfilter/nfnetlink_queue.c
> +++ b/net/netfilter/nfnetlink_queue.c
> @@ -46,7 +46,10 @@
>  #include <net/netfilter/nf_conntrack.h>
>  #endif
>  
> -#define NFQNL_QMAX_DEFAULT 1024
> +#define NFQNL_QMAX_DEFAULT      1024
> +#define NFQNL_MIN_HASH_SIZE     16
> +#define NFQNL_DEFAULT_HASH_SIZE 1024
> +#define NFQNL_MAX_HASH_SIZE     131072
>  
>  /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
>   * includes the header length. Thus, the maximum packet length that we
> @@ -65,6 +68,7 @@ struct nfqnl_instance {
>  	unsigned int copy_range;
>  	unsigned int queue_dropped;
>  	unsigned int queue_user_dropped;
> +	unsigned int queue_hash_size;
>  
>  
>  	u_int16_t queue_num;			/* number of this queue */
> @@ -77,6 +81,8 @@ struct nfqnl_instance {
>  	spinlock_t	lock	____cacheline_aligned_in_smp;
>  	unsigned int	queue_total;
>  	unsigned int	id_sequence;		/* 'sequence' of pkt ids */
> +	unsigned int	queue_hash_mask;
> +	struct hlist_head *queue_hash;
>  	struct list_head queue_list;		/* packets in queue */
>  };
>  
> @@ -95,6 +101,39 @@ static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
>  	return net_generic(net, nfnl_queue_net_id);
>  }
>  
> +static inline unsigned int
> +nfqnl_packet_hash(u32 id, unsigned int mask)
> +{
> +	return id & mask;
> +}
> +
> +static inline u32
> +nfqnl_normalize_hash_size(u32 hash_size)
> +{
> +	/* Must be power of two for queue_hash_mask to work correctly.
> +	 * Avoid overflow of is_power_of_2 by bounding NFQNL_MAX_HASH_SIZE.
> +	 */
> +	BUILD_BUG_ON(!is_power_of_2(NFQNL_MIN_HASH_SIZE) ||
> +		     !is_power_of_2(NFQNL_DEFAULT_HASH_SIZE) ||
> +		     !is_power_of_2(NFQNL_MAX_HASH_SIZE) ||
> +		     NFQNL_MAX_HASH_SIZE > 1U << 31);
> +
> +	if (!hash_size)
> +		return NFQNL_DEFAULT_HASH_SIZE;
> +
> +	/* Clamp to valid range before power of two to avoid overflow */
> +	if (hash_size <= NFQNL_MIN_HASH_SIZE)
> +		return NFQNL_MIN_HASH_SIZE;
> +
> +	if (hash_size >= NFQNL_MAX_HASH_SIZE)
> +		return NFQNL_MAX_HASH_SIZE;
> +
> +	if (!is_power_of_2(hash_size))
> +		hash_size = roundup_pow_of_two(hash_size);
> +
> +	return hash_size;
> +}
> +
>  static inline u_int8_t instance_hashfn(u_int16_t queue_num)
>  {
>  	return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
> @@ -114,13 +153,70 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
>  	return NULL;
>  }
>  
> +static int
> +nfqnl_hash_resize(struct nfqnl_instance *inst, u32 hash_size)

rhashtable can just handle this for you, then users do not need
to tune this hash_size parameter.

> +{
> +	struct hlist_head *new_hash, *old_hash;
> +	struct nf_queue_entry *entry;
> +	unsigned int h, hash_mask;
> +
> +	hash_size = nfqnl_normalize_hash_size(hash_size);
> +	if (hash_size == inst->queue_hash_size)
> +		return 0;
> +
> +	/* GFP_ATOMIC required: called under rcu_read_lock in nfqnl_recv_config.
> +	 * Using GFP_KERNEL_ACCOUNT would require refactoring lock placement.
> +	 */
> +	new_hash = kvmalloc_array(hash_size, sizeof(*new_hash), GFP_ATOMIC);
> +	if (!new_hash)
> +		return -ENOMEM;
> +
> +	hash_mask = hash_size - 1;
> +
> +	for (h = 0; h < hash_size; h++)
> +		INIT_HLIST_HEAD(&new_hash[h]);
> +
> +	spin_lock_bh(&inst->lock);
> +
> +	list_for_each_entry(entry, &inst->queue_list, list) {
> +		/* No hlist_del() since old_hash will be freed and we hold lock */
> +		h = nfqnl_packet_hash(entry->id, hash_mask);
> +		hlist_add_head(&entry->hash_node, &new_hash[h]);
> +	}
> +
> +	old_hash = inst->queue_hash;
> +	inst->queue_hash_size = hash_size;
> +	inst->queue_hash_mask = hash_mask;
> +	inst->queue_hash = new_hash;
> +
> +	spin_unlock_bh(&inst->lock);
> +
> +	kvfree(old_hash);
> +
> +	return 0;
> +}
> +
>  static struct nfqnl_instance *
> -instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
> +instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid,
> +		u32 hash_size)
>  {
>  	struct nfqnl_instance *inst;
> +	struct hlist_head *queue_hash;
>  	unsigned int h;
>  	int err;
>  
> +	hash_size = nfqnl_normalize_hash_size(hash_size);
> +
> +	/* GFP_ATOMIC required: called under rcu_read_lock in nfqnl_recv_config.
> +	 * Using GFP_KERNEL_ACCOUNT would require refactoring lock placement.
> +	 */
> +	queue_hash = kvmalloc_array(hash_size, sizeof(*queue_hash), GFP_ATOMIC);

If rhashtable is used, this can be allocate perns and then you avoid
this GFP_ATOMIC for each instance.

> +	if (!queue_hash)
> +		return ERR_PTR(-ENOMEM);
> +
> +	for (h = 0; h < hash_size; h++)
> +		INIT_HLIST_HEAD(&queue_hash[h]);
> +
>  	spin_lock(&q->instances_lock);
>  	if (instance_lookup(q, queue_num)) {
>  		err = -EEXIST;

next prev parent reply	other threads:[~2026-01-13  0:25 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-22  0:37 [PATCH v5] netfilter: nfnetlink_queue: optimize verdict lookup with hash table Scott Mitchell
2025-12-03 18:33 ` Scott Mitchell
2025-12-03 18:40   ` Florian Westphal
2025-12-03 21:07     ` Scott Mitchell
2026-01-13  0:25 ` Pablo Neira Ayuso [this message]
2026-01-14  1:32   ` Scott Mitchell
2026-01-15  0:50     ` Pablo Neira Ayuso
2026-01-15 17:07     ` Florian Westphal
2026-01-17 17:33       ` Scott Mitchell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aWWQ-ooAmTIEhdHO@chamomile \
    --to=pablo@netfilter.org \
    --cc=coreteam@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=fw@strlen.de \
    --cc=horms@kernel.org \
    --cc=kadlec@netfilter.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=phil@nwl.cc \
    --cc=scott.k.mitch1@gmail.com \
    --cc=syzbot@syzkaller.appspotmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox