All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH nf 1/1] bridge: br_netfilter: give fake rtable its own lifetime
       [not found] <cover.1778687139.git.royenheart@gmail.com>
@ 2026-05-14  3:48 ` Ren Wei
  2026-05-19 19:20   ` Florian Westphal
  0 siblings, 1 reply; 3+ messages in thread
From: Ren Wei @ 2026-05-14  3:48 UTC (permalink / raw)
  To: netfilter-devel, bridge
  Cc: pablo, fw, phil, razor, idosch, stephen, sw, davem, yuantan098,
	yifanwucs, tomapufckgml, bird, royenheart, n05ec

From: Haoze Xie <royenheart@gmail.com>

The bridge netfilter fake rtable is currently embedded in struct
net_bridge even though packets can keep using it after bridge teardown.

Give the fake rtable its own allocated lifetime and make
bridge_parent_rtable() return a referenced dst. This way the bridge and
any packets that still carry the fake dst each hold their own reference,
so bridge teardown no longer leaves a dangling fake dst behind.

We are sending this to the nf tree because the patch touches
br_netfilter paths; if the net tree is preferred, please let us know.

Fixes: 4adf0af6818f ("bridge: send correct MTU value in PMTU (revised)")
Cc: stable@kernel.org
Reported-by: Yuan Tan <yuantan098@gmail.com>
Reported-by: Yifan Wu <yifanwucs@gmail.com>
Reported-by: Juefei Pu <tomapufckgml@gmail.com>
Reported-by: Xin Liu <bird@lzu.edu.cn>
Signed-off-by: Haoze Xie <royenheart@gmail.com>
Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
---
 include/net/netfilter/br_netfilter.h | 14 +++++++-
 net/bridge/br_device.c               | 18 ++++++++--
 net/bridge/br_netfilter_hooks.c      |  2 +-
 net/bridge/br_netfilter_ipv6.c       |  2 +-
 net/bridge/br_nf_core.c              | 49 +++++++++++++++++++++++-----
 net/bridge/br_private.h              | 12 +++----
 6 files changed, 76 insertions(+), 21 deletions(-)

diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index 371696ec11b2..ad09dad09da8 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -3,6 +3,7 @@
 #define _BR_NETFILTER_H_
 
 #include <linux/netfilter.h>
+#include <net/dst.h>
 
 #include "../../../net/bridge/br_private.h"
 
@@ -44,9 +45,20 @@ static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct net_bridge_port *port;
+	struct rtable *rt;
 
+	rt = NULL;
+	rcu_read_lock();
 	port = br_port_get_rcu(dev);
-	return port ? &port->br->fake_rtable : NULL;
+	if (!port)
+		goto out;
+
+	rt = rcu_dereference(port->br->fake_rtable);
+	if (rt && !dst_hold_safe(&rt->dst))
+		rt = NULL;
+out:
+	rcu_read_unlock();
+	return rt;
 #else
 	return NULL;
 #endif
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index a35ceae0a6f2..ca2e76929377 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -146,6 +146,15 @@ static int br_dev_init(struct net_device *dev)
 		return err;
 	}
 
+	err = br_netfilter_rtable_init(br);
+	if (err) {
+		br_multicast_uninit_stats(br);
+		br_vlan_flush(br);
+		br_mdb_hash_fini(br);
+		br_fdb_hash_fini(br);
+		return err;
+	}
+
 	netdev_lockdep_set_classes(dev);
 	return 0;
 }
@@ -154,6 +163,7 @@ static void br_dev_uninit(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
 
+	br_netfilter_rtable_fini(br);
 	br_multicast_dev_del(br);
 	br_multicast_uninit_stats(br);
 	br_vlan_flush(br);
@@ -210,8 +220,11 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 	/* this flag will be cleared if the MTU was automatically adjusted */
 	br_opt_toggle(br, BROPT_MTU_SET_BY_USER, true);
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	/* remember the MTU in the rtable for PMTU */
-	dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
+	struct rtable *rt;
+
+	rt = rcu_dereference_protected(br->fake_rtable, lockdep_rtnl_is_held());
+	if (rt)
+		dst_metric_set(&rt->dst, RTAX_MTU, new_mtu);
 #endif
 
 	return 0;
@@ -529,7 +542,6 @@ void br_dev_setup(struct net_device *dev)
 	br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME;
 	dev->max_mtu = ETH_MAX_MTU;
 
-	br_netfilter_rtable_init(br);
 	br_stp_timer_init(br);
 	br_multicast_init(br);
 	INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 0ab1c94db4b9..8b3b2fb48334 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -417,7 +417,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
 			return 0;
 		}
 		skb_dst_drop(skb);
-		skb_dst_set_noref(skb, &rt->dst);
+		skb_dst_set(skb, &rt->dst);
 	}
 
 	skb->dev = br_indev;
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index d8548428929e..4e245645f7e6 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -144,7 +144,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
 			return 0;
 		}
 		skb_dst_drop(skb);
-		skb_dst_set_noref(skb, &rt->dst);
+		skb_dst_set(skb, &rt->dst);
 	}
 
 	skb->dev = br_indev;
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index a8c67035e23c..89a0a3d107de 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -14,6 +14,8 @@
 #include <linux/kernel.h>
 #include <linux/in_route.h>
 #include <linux/inetdevice.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
 #include <net/route.h>
 
 #include "br_private.h"
@@ -49,6 +51,11 @@ static unsigned int fake_mtu(const struct dst_entry *dst)
 	return dst->dev->mtu;
 }
 
+struct br_fake_rtable {
+	struct rtable	rt;
+	u32		metrics[RTAX_MAX];
+};
+
 static struct dst_ops fake_dst_ops = {
 	.family		= AF_INET,
 	.update_pmtu	= fake_update_pmtu,
@@ -65,24 +72,50 @@ static struct dst_ops fake_dst_ops = {
  * ipt_REJECT needs it.  Future netfilter modules might
  * require us to fill additional fields.
  */
-void br_netfilter_rtable_init(struct net_bridge *br)
+int br_netfilter_rtable_init(struct net_bridge *br)
 {
-	struct rtable *rt = &br->fake_rtable;
+	struct br_fake_rtable *fake_rt;
+	struct rtable *rt;
+
+	fake_rt = kmem_cache_zalloc(fake_dst_ops.kmem_cachep, GFP_KERNEL);
+	if (!fake_rt)
+		return -ENOMEM;
 
-	rcuref_init(&rt->dst.__rcuref, 1);
-	rt->dst.dev = br->dev;
-	dst_init_metrics(&rt->dst, br->metrics, false);
+	rt = &fake_rt->rt;
+	dst_init(&rt->dst, &fake_dst_ops, br->dev, DST_OBSOLETE_NONE,
+		 DST_NOXFRM | DST_FAKE_RTABLE);
+	dst_init_metrics(&rt->dst, fake_rt->metrics, false);
 	dst_metric_set(&rt->dst, RTAX_MTU, br->dev->mtu);
-	rt->dst.flags	= DST_NOXFRM | DST_FAKE_RTABLE;
-	rt->dst.ops = &fake_dst_ops;
+	rcu_assign_pointer(br->fake_rtable, rt);
+
+	return 0;
+}
+
+void br_netfilter_rtable_fini(struct net_bridge *br)
+{
+	struct rtable *rt;
+
+	rt = rcu_replace_pointer(br->fake_rtable, NULL, lockdep_rtnl_is_held());
+	if (rt)
+		dst_release(&rt->dst);
 }
 
 int __init br_nf_core_init(void)
 {
-	return dst_entries_init(&fake_dst_ops);
+	int err;
+
+	fake_dst_ops.kmem_cachep =
+		KMEM_CACHE(br_fake_rtable, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+	err = dst_entries_init(&fake_dst_ops);
+	if (err)
+		fake_dst_ops.kmem_cachep = NULL;
+
+	return err;
 }
 
 void br_nf_core_fini(void)
 {
 	dst_entries_destroy(&fake_dst_ops);
+	kmem_cache_destroy(fake_dst_ops.kmem_cachep);
+	fake_dst_ops.kmem_cachep = NULL;
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index bed1b1d9b282..bb4aa408f232 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -508,11 +508,7 @@ struct net_bridge {
 	struct rhashtable		fdb_hash_tbl;
 	struct list_head		port_list;
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	union {
-		struct rtable		fake_rtable;
-		struct rt6_info		fake_rt6_info;
-	};
-	u32				metrics[RTAX_MAX];
+	struct rtable __rcu		*fake_rtable;
 #endif
 	u16				group_fwd_mask;
 	u16				group_fwd_mask_required;
@@ -2018,11 +2014,13 @@ extern const struct nf_br_ops __rcu *nf_br_ops;
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 int br_nf_core_init(void);
 void br_nf_core_fini(void);
-void br_netfilter_rtable_init(struct net_bridge *);
+int br_netfilter_rtable_init(struct net_bridge *br);
+void br_netfilter_rtable_fini(struct net_bridge *br);
 #else
 static inline int br_nf_core_init(void) { return 0; }
 static inline void br_nf_core_fini(void) {}
-#define br_netfilter_rtable_init(x)
+static inline int br_netfilter_rtable_init(struct net_bridge *br) { return 0; }
+static inline void br_netfilter_rtable_fini(struct net_bridge *br) {}
 #endif
 
 /* br_stp.c */
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH nf 1/1] bridge: br_netfilter: give fake rtable its own lifetime
  2026-05-14  3:48 ` [PATCH nf 1/1] bridge: br_netfilter: give fake rtable its own lifetime Ren Wei
@ 2026-05-19 19:20   ` Florian Westphal
  2026-05-26  1:37     ` Haoze Xie
  0 siblings, 1 reply; 3+ messages in thread
From: Florian Westphal @ 2026-05-19 19:20 UTC (permalink / raw)
  To: Ren Wei
  Cc: netfilter-devel, bridge, pablo, phil, razor, idosch, stephen, sw,
	davem, yuantan098, yifanwucs, tomapufckgml, bird, royenheart

Ren Wei <n05ec@lzu.edu.cn> wrote:
> From: Haoze Xie <royenheart@gmail.com>
> 
> The bridge netfilter fake rtable is currently embedded in struct
> net_bridge even though packets can keep using it after bridge teardown.

How?  Please elaborate a bit, it is unexpected.

> Give the fake rtable its own allocated lifetime and make
> bridge_parent_rtable() return a referenced dst. This way the bridge and
> any packets that still carry the fake dst each hold their own reference,
> so bridge teardown no longer leaves a dangling fake dst behind.

If we have to do this it would be better to move this kludge into
br_netfilter.c completely and get rid of the fake rtable hack in bridge
for good.

Please also see various AI comments at
https://sashiko.dev/#/patchset/783d76ac83917b7302c1ec647794bd773bb1875a.1778687139.git.royenheart%40gmail.com

[ I would like to zap bridge_netfilter but it seems its too popular ... ]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH nf 1/1] bridge: br_netfilter: give fake rtable its own lifetime
  2026-05-19 19:20   ` Florian Westphal
@ 2026-05-26  1:37     ` Haoze Xie
  0 siblings, 0 replies; 3+ messages in thread
From: Haoze Xie @ 2026-05-26  1:37 UTC (permalink / raw)
  To: Florian Westphal, Ren Wei
  Cc: netfilter-devel, bridge, pablo, phil, razor, idosch, stephen, sw,
	davem, yuantan098, yifanwucs, tomapufckgml, bird

On 5/20/2026 3:20 AM, Florian Westphal wrote:
> Ren Wei <n05ec@lzu.edu.cn> wrote:
>> From: Haoze Xie <royenheart@gmail.com>
>>
>> The bridge netfilter fake rtable is currently embedded in struct
>> net_bridge even though packets can keep using it after bridge teardown.
> 
> How?  Please elaborate a bit, it is unexpected.
> 

The fake rtable is attached to bridged skbs in the br_netfilter
prerouting restore path via bridge_parent_rtable() +
skb_dst_set_noref().

If such a packet is queued to NFQUEUE, __nf_queue() upgrades that fake
dst with skb_dst_force(). From that point on, the queued skb can hold a
real dst reference even after bridge teardown starts freeing the
backing struct net_bridge storage. When the verdict path later drops or
reinjects the skb, dst_release() can still touch that freed fake dst.

>> Give the fake rtable its own allocated lifetime and make
>> bridge_parent_rtable() return a referenced dst. This way the bridge and
>> any packets that still carry the fake dst each hold their own reference,
>> so bridge teardown no longer leaves a dangling fake dst behind.
> 
> If we have to do this it would be better to move this kludge into
> br_netfilter.c completely and get rid of the fake rtable hack in bridge
> for good.
> 

I reworked the patch for v2 to keep the lifetime handling in
br_netfilter and to address the issues from the previous version.

Instead of adding a separate br_netfilter-private dst lifetime scheme,
v2 moves the fake rtable out of struct net_bridge, makes
bridge_parent_rtable() return a held dst reference, and switches the
callers to skb_dst_set().

To avoid the extra lifetime issues in the previous version, the new
fake dst is allocated with rt_dst_alloc(), so it reuses the core IPv4
rtable lifecycle instead of custom br_netfilter dst_ops state. During
teardown, br_netfilter first detaches the fake rtable from the bridge,
then calls dst_dev_put() before dropping the bridge-owned dst
reference.

> Please also see various AI comments at
> https://sashiko.dev/#/patchset/783d76ac83917b7302c1ec647794bd773bb1875a.1778687139.git.royenheart%40gmail.com
> 

I also went through the Sashiko comments you pointed out. The v2 change
is specifically meant to avoid both the bridge-private storage UAF and
the follow-up lifetime problems from the previous approach.

I will send v2.

Thanks,
Haoze

> [ I would like to zap bridge_netfilter but it seems its too popular ... ]


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-05-26  1:37 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <cover.1778687139.git.royenheart@gmail.com>
2026-05-14  3:48 ` [PATCH nf 1/1] bridge: br_netfilter: give fake rtable its own lifetime Ren Wei
2026-05-19 19:20   ` Florian Westphal
2026-05-26  1:37     ` Haoze Xie

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.