Netdev List
 help / color / mirror / Atom feed
* [PATCH net] ipv4: protect egress device access in the output path with rcu_read_lock
@ 2026-05-30  6:49 Hyunwoo Kim
  2026-05-30  9:48 ` Hyunwoo Kim
  0 siblings, 1 reply; 2+ messages in thread
From: Hyunwoo Kim @ 2026-05-30  6:49 UTC (permalink / raw)
  To: dsahern, idosch, davem, edumazet, kuba, pabeni, horms,
	steffen.klassert, herbert
  Cc: netdev, imv4bel

ip_mc_output(), __ip_local_out() and xfrm4_output() read the egress
network device from rt->dst.dev / skb_dst_dev(skb) and then pass that
pointer as the 'out' device argument of NF_HOOK without holding
rcu_read_lock.

dst->dev is a field protected by RCU. When a device is unregistered its
value is replaced with blackhole_netdev, and the previous device is
freed after an RCU grace period. A section that reads dst->dev and uses
that pointer must therefore hold rcu_read_lock. The three functions
above are missing this protection, so when the egress device is
unregistered concurrently with transmission, a LOCAL_OUT / POST_ROUTING
hook (nft meta oif, selinux_ip_postroute_compat, etc.) can reference a
device pointer that is no longer valid.

In all three leaves, wrap the section that reads the device and runs
NF_HOOK in rcu_read_lock(), and read the device through the RCU
accessors (skb_dst_dev_rcu() / dst_dev_rcu()). As a result, the RCU
grace period cannot complete while a transmission is in flight, so the
egress device is not freed and the hook always references a valid
device.

Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
---
 net/ipv4/ip_output.c    | 27 +++++++++++++++++++--------
 net/ipv4/xfrm4_output.c | 13 +++++++++----
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5bcd73cbdb41..92a70b7e74a6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -102,6 +102,7 @@ EXPORT_SYMBOL(ip_send_check);
 int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
+	int err;
 
 	IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS);
 
@@ -117,9 +118,12 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	skb->protocol = htons(ETH_P_IP);
 
-	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
-		       net, sk, skb, NULL, skb_dst_dev(skb),
-		       dst_output);
+	rcu_read_lock();
+	err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+		      net, sk, skb, NULL, skb_dst_dev_rcu(skb),
+		      dst_output);
+	rcu_read_unlock();
+	return err;
 }
 
 int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -368,7 +372,11 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
 int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct rtable *rt = skb_rtable(skb);
-	struct net_device *dev = rt->dst.dev;
+	struct net_device *dev;
+	int ret;
+
+	rcu_read_lock();
+	dev = dst_dev_rcu(&rt->dst);
 
 	/*
 	 *	If the indicated interface is up and running, send the packet.
@@ -407,6 +415,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 		if (ip_hdr(skb)->ttl == 0) {
 			kfree_skb(skb);
+			rcu_read_unlock();
 			return 0;
 		}
 	}
@@ -419,10 +428,12 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 				ip_mc_finish_output);
 	}
 
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-			    net, sk, skb, NULL, skb->dev,
-			    ip_finish_output,
-			    !(IPCB(skb)->flags & IPSKB_REROUTED));
+	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			   net, sk, skb, NULL, skb->dev,
+			   ip_finish_output,
+			   !(IPCB(skb)->flags & IPSKB_REROUTED));
+	rcu_read_unlock();
+	return ret;
 }
 
 int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 0ae67d537499..d5dc084777a3 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,10 +30,15 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-			    net, sk, skb, skb->dev, skb_dst_dev(skb),
-			    __xfrm4_output,
-			    !(IPCB(skb)->flags & IPSKB_REROUTED));
+	int ret;
+
+	rcu_read_lock();
+	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			   net, sk, skb, skb->dev, skb_dst_dev_rcu(skb),
+			   __xfrm4_output,
+			   !(IPCB(skb)->flags & IPSKB_REROUTED));
+	rcu_read_unlock();
+	return ret;
 }
 
 void xfrm4_local_error(struct sk_buff *skb, u32 mtu)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-05-30  9:48 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-30  6:49 [PATCH net] ipv4: protect egress device access in the output path with rcu_read_lock Hyunwoo Kim
2026-05-30  9:48 ` Hyunwoo Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox