public inbox for netfilter-devel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH nf 0/2] netfilter: nft_set_pipapo: fix UaF during gc walk
@ 2026-03-03 19:02 Florian Westphal
  2026-03-03 19:02 ` [PATCH nf 1/2] netfilter: nft_set_pipapo: split gc in unlink and reclaim phase Florian Westphal
  2026-03-03 19:02 ` [PATCH nf 2/2] netfilter: nft_set_pipapo: prevent soft lockup during gc walk Florian Westphal
  0 siblings, 2 replies; 4+ messages in thread
From: Florian Westphal @ 2026-03-03 19:02 UTC (permalink / raw)
  To: netfilter-devel; +Cc: sbrivio, Florian Westphal

Yiming Qian reports Use-after-free in the pipapo set type:
  Under a large number of expired elements, commit-time GC can run for a very
  long time in a non-preemptible context, triggering soft lockup warnings and
  RCU stall reports (local denial of service).

As-is, elements are unlinked from the clone.  But the expired elements
are also reachable from the live copy.

Therefore, we must not queue them for freeing until after the clone
has been exposed to other CPUs and one grace period has elapsed.

Split gc into unlink + reclaim phase to resolve this bug.

Florian Westphal (2):
  netfilter: nft_set_pipapo: split gc in unlink and reclaim phase
  netfilter: nft_set_pipapo: prevent soft lockup during gc walk

 net/netfilter/nft_set_pipapo.c | 69 +++++++++++++++++++++++++---------
 net/netfilter/nft_set_pipapo.h |  4 ++
 2 files changed, 56 insertions(+), 17 deletions(-)
-- 
2.52.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH nf 1/2] netfilter: nft_set_pipapo: split gc in unlink and reclaim phase
  2026-03-03 19:02 [PATCH nf 0/2] netfilter: nft_set_pipapo: fix UaF during gc walk Florian Westphal
@ 2026-03-03 19:02 ` Florian Westphal
  2026-03-03 23:50   ` Florian Westphal
  2026-03-03 19:02 ` [PATCH nf 2/2] netfilter: nft_set_pipapo: prevent soft lockup during gc walk Florian Westphal
  1 sibling, 1 reply; 4+ messages in thread
From: Florian Westphal @ 2026-03-03 19:02 UTC (permalink / raw)
  To: netfilter-devel; +Cc: sbrivio, Florian Westphal, Yiming Qian

Yiming Qian reports Use-after-free in the pipapo set type:
  Under a large number of expired elements, commit-time GC can run for a very
  long time in a non-preemptible context, triggering soft lockup warnings and
  RCU stall reports (local denial of service).

We must split GC in an unlink and a reclaim phase.

We CANNOT queue elements for reaping by the GC engine until after
pointers have been swapped.  Expired elements are still fully exposed to
both the packet path and userspace dumpers via the live copy of the data
structure.

call_rcu() does NOT protect us: dump operations or element lookups starting
after call_rcu has fired can still observe the free'd element, unless the
commit phase has made enough progress to swap the clone and live pointers.

This a similar approach as done recently for the rbtree backend in commit
35f83a75529a ("netfilter: nft_set_rbtree: don't gc elements on insert").

Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nft_set_pipapo.c | 65 +++++++++++++++++++++++++---------
 net/netfilter/nft_set_pipapo.h |  4 +++
 2 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index c091898df710..d850166b8e45 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1680,22 +1680,17 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
 }
 
 /**
- * pipapo_gc() - Drop expired entries from set, destroy start and end elements
+ * pipapo_gc_scan() - Drop expired entries from set and link them to gc list
  * @set:	nftables API set representation
  * @m:		Matching data
  */
-static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m)
 {
 	struct nft_pipapo *priv = nft_set_priv(set);
 	struct net *net = read_pnet(&set->net);
 	unsigned int rules_f0, first_rule = 0;
 	u64 tstamp = nft_net_tstamp(net);
 	struct nft_pipapo_elem *e;
-	struct nft_trans_gc *gc;
-
-	gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
-	if (!gc)
-		return;
 
 	while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
 		union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
@@ -1724,13 +1719,11 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
 		 * NFT_SET_ELEM_DEAD_BIT.
 		 */
 		if (__nft_set_elem_expired(&e->ext, tstamp)) {
-			gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
-			if (!gc)
-				return;
-
 			nft_pipapo_gc_deactivate(net, set, e);
 			pipapo_drop(m, rulemap);
-			nft_trans_gc_elem_add(gc, e);
+
+			e->to_free = priv->to_free;
+			priv->to_free = e;
 
 			/* And check again current first rule, which is now the
 			 * first we haven't checked.
@@ -1740,11 +1733,39 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
 		}
 	}
 
-	gc = nft_trans_gc_catchall_sync(gc);
-	if (gc) {
-		nft_trans_gc_queue_sync_done(gc);
-		priv->last_gc = jiffies;
+	priv->last_gc = jiffies;
+}
+
+/**
+ * pipapo_gc_queue() - Free expired elements
+ * @set:	nftables API set representation
+ */
+static void pipapo_gc_queue(struct nft_set *set)
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct nft_pipapo_elem *e = priv->to_free;
+	struct nft_trans_gc *gc;
+
+	if (!e)
+		return;
+
+	gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+	if (!gc)
+		return;
+
+	while (e) {
+		struct nft_pipapo_elem *next = e->to_free;
+
+		nft_trans_gc_elem_add(gc, e);
+		priv->to_free = next;
+		e = next;
+		gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
+		if (!gc)
+			return;
 	}
+
+	gc = nft_trans_gc_catchall_sync(gc);
+	nft_trans_gc_queue_sync_done(gc);
 }
 
 /**
@@ -1807,7 +1828,7 @@ static void nft_pipapo_commit(struct nft_set *set)
 		return;
 
 	if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
-		pipapo_gc(set, priv->clone);
+		pipapo_gc_scan(set, priv->clone);
 
 	old = rcu_replace_pointer(priv->match, priv->clone,
 				  nft_pipapo_transaction_mutex_held(set));
@@ -1815,6 +1836,8 @@ static void nft_pipapo_commit(struct nft_set *set)
 
 	if (old)
 		call_rcu(&old->rcu, pipapo_reclaim_match);
+
+	pipapo_gc_queue(set);
 }
 
 static void nft_pipapo_abort(const struct nft_set *set)
@@ -2279,6 +2302,7 @@ static int nft_pipapo_init(const struct nft_set *set,
 		f->mt = NULL;
 	}
 
+	priv->to_free = NULL;
 	rcu_assign_pointer(priv->match, m);
 
 	return 0;
@@ -2328,6 +2352,13 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
 	struct nft_pipapo *priv = nft_set_priv(set);
 	struct nft_pipapo_match *m;
 
+	while (priv->to_free) {
+		struct nft_pipapo_elem *e = priv->to_free;
+
+		priv->to_free = e->to_free;
+		nf_tables_set_elem_destroy(ctx, set, &e->priv);
+	}
+
 	m = rcu_dereference_protected(priv->match, true);
 
 	if (priv->clone) {
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index eaab422aa56a..4a5baebabaa5 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -156,12 +156,14 @@ struct nft_pipapo_match {
  * @clone:	Copy where pending insertions and deletions are kept
  * @width:	Total bytes to be matched for one packet, including padding
  * @last_gc:	Timestamp of last garbage collection run, jiffies
+ * @to_free:	single-linked list of elements to queue up for memory reclaim
  */
 struct nft_pipapo {
 	struct nft_pipapo_match __rcu *match;
 	struct nft_pipapo_match *clone;
 	int width;
 	unsigned long last_gc;
+	struct nft_pipapo_elem *to_free;
 };
 
 struct nft_pipapo_elem;
@@ -169,10 +171,12 @@ struct nft_pipapo_elem;
 /**
  * struct nft_pipapo_elem - API-facing representation of single set element
  * @priv:	element placeholder
+ * @to_free:	list of elements waiting for mem reclaim
  * @ext:	nftables API extensions
  */
 struct nft_pipapo_elem {
 	struct nft_elem_priv	priv;
+	struct nft_pipapo_elem  *to_free;
 	struct nft_set_ext	ext;
 };
 
-- 
2.52.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH nf 2/2] netfilter: nft_set_pipapo: prevent soft lockup during gc walk
  2026-03-03 19:02 [PATCH nf 0/2] netfilter: nft_set_pipapo: fix UaF during gc walk Florian Westphal
  2026-03-03 19:02 ` [PATCH nf 1/2] netfilter: nft_set_pipapo: split gc in unlink and reclaim phase Florian Westphal
@ 2026-03-03 19:02 ` Florian Westphal
  1 sibling, 0 replies; 4+ messages in thread
From: Florian Westphal @ 2026-03-03 19:02 UTC (permalink / raw)
  To: netfilter-devel; +Cc: sbrivio, Florian Westphal, Yiming Qian

The gc scan+shrinking process can take a very long time.

Add an upper ceiling: If we've queued up some elements for removal
already give up after spending up to 10s on gc compaction.

Note this intentionally doesn't add a call to cond_resched();
PREEMPT_NONE and _VOLUNTARY preemption models have been removed
recently.

Reported-by: Yiming Qian <yimingqian591@gmail.com>
Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nft_set_pipapo.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index d850166b8e45..0cd91f809655 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1686,6 +1686,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
  */
 static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m)
 {
+	unsigned long stop_time = jiffies + 10 * HZ;
 	struct nft_pipapo *priv = nft_set_priv(set);
 	struct net *net = read_pnet(&set->net);
 	unsigned int rules_f0, first_rule = 0;
@@ -1697,6 +1698,9 @@ static void pipapo_gc_scan(struct nft_set *set, struct nft_pipapo_match *m)
 		const struct nft_pipapo_field *f;
 		unsigned int i, start, rules_fx;
 
+		if (priv->to_free && time_after(jiffies, stop_time))
+			return;
+
 		start = first_rule;
 		rules_fx = rules_f0;
 
-- 
2.52.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH nf 1/2] netfilter: nft_set_pipapo: split gc in unlink and reclaim phase
  2026-03-03 19:02 ` [PATCH nf 1/2] netfilter: nft_set_pipapo: split gc in unlink and reclaim phase Florian Westphal
@ 2026-03-03 23:50   ` Florian Westphal
  0 siblings, 0 replies; 4+ messages in thread
From: Florian Westphal @ 2026-03-03 23:50 UTC (permalink / raw)
  To: netfilter-devel; +Cc: sbrivio, Yiming Qian

Florian Westphal <fw@strlen.de> wrote:
> index eaab422aa56a..4a5baebabaa5 100644
> --- a/net/netfilter/nft_set_pipapo.h
> +++ b/net/netfilter/nft_set_pipapo.h
> @@ -156,12 +156,14 @@ struct nft_pipapo_match {
>   * @clone:	Copy where pending insertions and deletions are kept
>   * @width:	Total bytes to be matched for one packet, including padding
>   * @last_gc:	Timestamp of last garbage collection run, jiffies
> + * @to_free:	single-linked list of elements to queue up for memory reclaim
>   */
>  struct nft_pipapo {
>  	struct nft_pipapo_match __rcu *match;
>  	struct nft_pipapo_match *clone;
>  	int width;
>  	unsigned long last_gc;
> +	struct nft_pipapo_elem *to_free;
>  };
>  
>  struct nft_pipapo_elem;
> @@ -169,10 +171,12 @@ struct nft_pipapo_elem;
>  /**
>   * struct nft_pipapo_elem - API-facing representation of single set element
>   * @priv:	element placeholder
> + * @to_free:	list of elements waiting for mem reclaim
>   * @ext:	nftables API extensions
>   */
>  struct nft_pipapo_elem {
>  	struct nft_elem_priv	priv;
> +	struct nft_pipapo_elem  *to_free;
>  	struct nft_set_ext	ext;

Pablo points out that we don't need this extra member.

Instead we'll chain nft_trans_gc containers via their list_head
members and just defer posting the gc containers to the gc engine.

So overall idea remains the same.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-03-03 23:50 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-03 19:02 [PATCH nf 0/2] netfilter: nft_set_pipapo: fix UaF during gc walk Florian Westphal
2026-03-03 19:02 ` [PATCH nf 1/2] netfilter: nft_set_pipapo: split gc in unlink and reclaim phase Florian Westphal
2026-03-03 23:50   ` Florian Westphal
2026-03-03 19:02 ` [PATCH nf 2/2] netfilter: nft_set_pipapo: prevent soft lockup during gc walk Florian Westphal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox