All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hyunwoo Kim <imv4bel@gmail.com>
To: dsahern@kernel.org, idosch@nvidia.com, davem@davemloft.net,
	edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
	horms@kernel.org, steffen.klassert@secunet.com,
	herbert@gondor.apana.org.au, andrew+netdev@lunn.ch,
	kuniyu@google.com, jlayton@kernel.org
Cc: netdev@vger.kernel.org, imv4bel@gmail.com
Subject: [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock
Date: Sun, 31 May 2026 16:06:50 +0900	[thread overview]
Message-ID: <ahveCu-zzFlpeVut@v4bel> (raw)

ip_mc_output(), __ip_local_out(), xfrm4_output(), raw_send_hdrinc(),
xfrm_output_resume() and vrf_output() read the egress network device
from rt->dst.dev / skb_dst(skb)->dev / skb_dst_dev(skb) and then pass
that pointer as the 'out' device argument of NF_HOOK without holding
rcu_read_lock.

dst->dev is a field protected by RCU. When a device is unregistered its
value is replaced with blackhole_netdev, and the previous device is
freed after an RCU grace period. A section that reads dst->dev and uses
that pointer must therefore hold rcu_read_lock. The functions above are
missing this protection, so when the egress device is unregistered
concurrently with transmission, a LOCAL_OUT / POST_ROUTING hook (nft
meta oif, selinux_ip_postroute_compat, etc.) can reference a device
pointer that is no longer valid.

In the leaves above, wrap the section that reads the device and runs
NF_HOOK in rcu_read_lock(), and read the device through the RCU
accessors (skb_dst_dev_rcu() / dst_dev_rcu()). As a result, the RCU
grace period cannot complete while a transmission is in flight, so the
egress device is not freed and the hook always references a valid
device.

Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
---
Changes in v2:
- Changed to a net-wide patch that also fixes the issue in
raw_send_hdrinc(), xfrm_output_resume() and vrf_output().
- v1: https://lore.kernel.org/all/ahqIg6vURwYI0LJ5@v4bel/
---
 drivers/net/vrf.c       | 16 +++++++++++-----
 net/ipv4/ip_output.c    | 27 +++++++++++++++++++--------
 net/ipv4/raw.c          |  4 +++-
 net/ipv4/xfrm4_output.c | 13 +++++++++----
 net/xfrm/xfrm_output.c  |  4 +++-
 5 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 46209917ae4d..e9a1dd961805 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -833,17 +833,23 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
 
 static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net_device *dev = skb_dst(skb)->dev;
+	struct net_device *dev;
+	int ret;
+
+	rcu_read_lock();
+	dev = skb_dst_dev_rcu(skb);
 
 	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-			    net, sk, skb, NULL, dev,
-			    vrf_finish_output,
-			    !(IPCB(skb)->flags & IPSKB_REROUTED));
+	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			   net, sk, skb, NULL, dev,
+			   vrf_finish_output,
+			   !(IPCB(skb)->flags & IPSKB_REROUTED));
+	rcu_read_unlock();
+	return ret;
 }
 
 /* set dst on skb to send packet to us via dev_xmit path. Allows
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5bcd73cbdb41..92a70b7e74a6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -102,6 +102,7 @@ EXPORT_SYMBOL(ip_send_check);
 int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
+	int err;
 
 	IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS);
 
@@ -117,9 +118,12 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	skb->protocol = htons(ETH_P_IP);
 
-	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
-		       net, sk, skb, NULL, skb_dst_dev(skb),
-		       dst_output);
+	rcu_read_lock();
+	err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+		      net, sk, skb, NULL, skb_dst_dev_rcu(skb),
+		      dst_output);
+	rcu_read_unlock();
+	return err;
 }
 
 int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -368,7 +372,11 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
 int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct rtable *rt = skb_rtable(skb);
-	struct net_device *dev = rt->dst.dev;
+	struct net_device *dev;
+	int ret;
+
+	rcu_read_lock();
+	dev = dst_dev_rcu(&rt->dst);
 
 	/*
 	 *	If the indicated interface is up and running, send the packet.
@@ -407,6 +415,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 		if (ip_hdr(skb)->ttl == 0) {
 			kfree_skb(skb);
+			rcu_read_unlock();
 			return 0;
 		}
 	}
@@ -419,10 +428,12 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 				ip_mc_finish_output);
 	}
 
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-			    net, sk, skb, NULL, skb->dev,
-			    ip_finish_output,
-			    !(IPCB(skb)->flags & IPSKB_REROUTED));
+	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			   net, sk, skb, NULL, skb->dev,
+			   ip_finish_output,
+			   !(IPCB(skb)->flags & IPSKB_REROUTED));
+	rcu_read_unlock();
+	return ret;
 }
 
 int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 68e88cb3e55c..555e62eacdc6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -410,9 +410,11 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 				skb_transport_header(skb))->type);
 	}
 
+	rcu_read_lock();
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
-		      net, sk, skb, NULL, rt->dst.dev,
+		      net, sk, skb, NULL, skb_dst_dev_rcu(skb),
 		      dst_output);
+	rcu_read_unlock();
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 0ae67d537499..d5dc084777a3 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,10 +30,15 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-			    net, sk, skb, skb->dev, skb_dst_dev(skb),
-			    __xfrm4_output,
-			    !(IPCB(skb)->flags & IPSKB_REROUTED));
+	int ret;
+
+	rcu_read_lock();
+	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			   net, sk, skb, skb->dev, skb_dst_dev_rcu(skb),
+			   __xfrm4_output,
+			   !(IPCB(skb)->flags & IPSKB_REROUTED));
+	rcu_read_unlock();
+	return ret;
 }
 
 void xfrm4_local_error(struct sk_buff *skb, u32 mtu)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index cc35c2fcbbe0..2b69ac55c8a5 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -604,9 +604,11 @@ int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
 		if (!skb_dst(skb)->xfrm)
 			return dst_output(net, sk, skb);
 
+		rcu_read_lock();
 		err = nf_hook(skb_dst(skb)->ops->family,
 			      NF_INET_POST_ROUTING, net, sk, skb,
-			      NULL, skb_dst(skb)->dev, xfrm_output2);
+			      NULL, skb_dst_dev_rcu(skb), xfrm_output2);
+		rcu_read_unlock();
 		if (unlikely(err != 1))
 			goto out;
 	}
-- 
2.43.0


             reply	other threads:[~2026-05-31  7:06 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-31  7:06 Hyunwoo Kim [this message]
2026-06-02 15:45 ` [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock Ido Schimmel
2026-06-02 16:24   ` David Ahern
2026-06-03 12:58     ` Hyunwoo Kim
2026-06-03 14:52       ` David Ahern
2026-06-04  1:28       ` Herbert Xu
2026-06-04  2:25         ` Herbert Xu
2026-06-04 12:27       ` Ido Schimmel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ahveCu-zzFlpeVut@v4bel \
    --to=imv4bel@gmail.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=horms@kernel.org \
    --cc=idosch@nvidia.com \
    --cc=jlayton@kernel.org \
    --cc=kuba@kernel.org \
    --cc=kuniyu@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=steffen.klassert@secunet.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.