All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH nf v3 1/1] bridge: br_netfilter: pin bridge device while NFQUEUE holds fake dst
@ 2026-06-04 17:32 Ren Wei
  2026-06-04 23:36 ` Florian Westphal
  0 siblings, 1 reply; 2+ messages in thread
From: Ren Wei @ 2026-06-04 17:32 UTC (permalink / raw)
  To: netfilter-devel
  Cc: pablo, fw, phil, yuantan098, yifanwucs, tomapufckgml, bird,
	royenheart, n05ec

From: Haoze Xie <royenheart@gmail.com>

The bridge netfilter fake rtable is embedded in struct net_bridge and is
attached to bridged packets with skb_dst_set_noref(). If such a packet is
queued to NFQUEUE, __nf_queue() upgrades that fake dst with
skb_dst_force().

At that point the queued skb can hold a real dst reference after bridge
teardown has started. The problem is not that every bridged packet needs
its own dst reference. The problem is that NFQUEUE can keep the bridge
private fake dst alive after unregister begins.

Fix this by keeping the bridge fake dst model unchanged and pinning the
bridge master device only while the packet sits in NFQUEUE. Record the
bridge device in nf_queue_entry when the queued skb carries a bridge fake
dst, take a device reference for the queue lifetime, and drop it when the
queue entry is freed.

This keeps netdev_priv(br->dev) alive until verdict completion, so the
embedded fake rtable and its metrics backing storage cannot be freed out
from under dst_release(). It also avoids the constant refcount bump and
avoids using ipv4-specific dst helpers for IPv6 bridge traffic.

Fixes: 34666d467cbf ("netfilter: bridge: move br_netfilter out of the core")
Cc: stable@kernel.org
Reported-by: Yuan Tan <yuantan098@gmail.com>
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Haoze Xie <royenheart@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
---
Changes in v3:
  - drop the per-packet fake dst refcounting from v2
  - stop using ipv4-specific dst helpers for the fake dst
  - keep the existing bridge fake rtable model unchanged on the fast path
  - pin the bridge master device only when NFQUEUE upgrades a fake dst
  into an asynchronous queued reference
  - v2 Link: https://lore.kernel.org/all/831936f111e6e1f435f4f6247d07fe6a6624d271.1779680014.git.royenheart@gmail.com/
changes in v2:
  - spell out how NFQUEUE upgrades the fake dst into a real reference
  - switch to rt_dst_alloc() instead of br_netfilter-private dst_ops state
  - detach the bridge device with dst_dev_put() during teardown
  - keep the ref-holding contract local to bridge_parent_rtable()
  - v1 Link: https://lore.kernel.org/all/783d76ac83917b7302c1ec647794bd773bb1875a.1778687139.git.royenheart@gmail.com/

 include/net/netfilter/nf_queue.h |  1 +
 net/netfilter/nf_queue.c         | 27 +++++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 3978c3174cdb..fc3e81c07364 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -18,6 +18,7 @@ struct nf_queue_entry {
 	unsigned int		id;
 	unsigned int		hook_index;	/* index in hook_entries->hook[] */
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	struct net_device	*bridge_dev;
 	struct net_device	*physin;
 	struct net_device	*physout;
 #endif
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 57b450024a99..ba9e2f3fbd94 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -24,6 +24,27 @@
 
 static const struct nf_queue_handler __rcu *nf_queue_handler;
 
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+static struct net_device *nf_queue_bridge_dev(const struct sk_buff *skb,
+					      const struct nf_hook_state *state)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct net_device *dev;
+
+	if (state->pf != NFPROTO_BRIDGE || !nf_bridge_info_exists(skb))
+		return NULL;
+
+	if (!dst || !(dst->flags & DST_FAKE_RTABLE))
+		return NULL;
+
+	dev = READ_ONCE(dst->dev);
+	if (!dev || dev == blackhole_netdev)
+		return NULL;
+
+	return dev;
+}
+#endif
+
 /*
  * Hook for nfnetlink_queue to register its queue handler.
  * We do this so that most of the NFQUEUE code can be modular.
@@ -68,6 +89,8 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 		nf_queue_sock_put(state->sk);
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	if (entry->bridge_dev)
+		dev_put(entry->bridge_dev);
 	dev_put(entry->physin);
 	dev_put(entry->physout);
 #endif
@@ -85,6 +108,7 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	const struct sk_buff *skb = entry->skb;
 
+	entry->bridge_dev = NULL;
 	if (nf_bridge_info_exists(skb)) {
 		entry->physin = nf_bridge_get_physindev(skb, entry->state.net);
 		entry->physout = nf_bridge_get_physoutdev(skb);
@@ -108,6 +132,9 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 	dev_hold(state->out);
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	entry->bridge_dev = nf_queue_bridge_dev(entry->skb, state);
+	if (entry->bridge_dev)
+		dev_hold(entry->bridge_dev);
 	dev_hold(entry->physin);
 	dev_hold(entry->physout);
 #endif
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH nf v3 1/1] bridge: br_netfilter: pin bridge device while NFQUEUE holds fake dst
  2026-06-04 17:32 [PATCH nf v3 1/1] bridge: br_netfilter: pin bridge device while NFQUEUE holds fake dst Ren Wei
@ 2026-06-04 23:36 ` Florian Westphal
  0 siblings, 0 replies; 2+ messages in thread
From: Florian Westphal @ 2026-06-04 23:36 UTC (permalink / raw)
  To: Ren Wei
  Cc: netfilter-devel, pablo, phil, yuantan098, yifanwucs, tomapufckgml,
	bird, royenheart

Ren Wei <n05ec@lzu.edu.cn> wrote:
> The bridge netfilter fake rtable is embedded in struct net_bridge and is
> attached to bridged packets with skb_dst_set_noref(). If such a packet is
> queued to NFQUEUE, __nf_queue() upgrades that fake dst with
> skb_dst_force().

Ok, I think I understand why this mess exists. Ideally we could rip out
the fake rtable and alloc it as separate object with distinct lifetime,
this FAKE_RTABLE crap needs to die...  But I understand its more
intrusive / harder to fix it that way (performance considerations don't
matter however, br_netfilter can be pessimized).

> +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> +static struct net_device *nf_queue_bridge_dev(const struct sk_buff *skb,
> +					      const struct nf_hook_state *state)
> +{
> +	struct dst_entry *dst = skb_dst(skb);
> +	struct net_device *dev;
> +
> +	if (state->pf != NFPROTO_BRIDGE || !nf_bridge_info_exists(skb))
> +		return NULL;
> +

I guess what you are saying is that if br_netfilter hack is on,
skb->dst can be fake rtable while packet is sent to nfnetlink_queue
from a *bridge* family hook where in/outdev are the physical ports
yet skb->dev isn't the bridge device either.  The forced ref on the
dst is useless in that case, because netdevice_removal frees the
net_device regardless of the fake rtable dst entries refcounts.

If thats correct, could you please streamline this patch slightly?

Something like this (totally untested and misses dev_put part); and
that comment might be a bit more verbose.

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -84,6 +84,8 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
 {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
        const struct sk_buff *skb = entry->skb;
+       struct dst_entry *dst = skb_dst(skb);
+       struct net_device *dev = NULL;
 
        if (nf_bridge_info_exists(skb)) {
                entry->physin = nf_bridge_get_physindev(skb, entry->state.net);
@@ -92,6 +94,17 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
                entry->physin = NULL;
                entry->physout = NULL;
        }
+
+       if (dst && (dst->flags & DST_FAKE_RTABLE)) {
+               dev = dst_dev_rcu(dst);
+               if (dev == blackhole_netdev) [ Q: Is that really needed? I don't think so ]
+                       dev = NULL;
+       }
+
+       /* Must hold reference on the bridge device: the fake rtable
+        * is embedded within, dst_hold() is not sufficient.
+        */
+       entry->br_dev = dev;
 #endif
 }
 
@@ -108,6 +121,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
        dev_hold(state->out);
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+       dev_hold(entry->br_dev);
        dev_hold(entry->physin);
        dev_hold(entry->physout);
 #endif

Thanks!

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-06-04 23:36 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-04 17:32 [PATCH nf v3 1/1] bridge: br_netfilter: pin bridge device while NFQUEUE holds fake dst Ren Wei
2026-06-04 23:36 ` Florian Westphal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.