Netdev List
 help / color / mirror / Atom feed
* [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock
@ 2026-05-31  7:06 Hyunwoo Kim
  2026-06-02 15:45 ` Ido Schimmel
  0 siblings, 1 reply; 5+ messages in thread
From: Hyunwoo Kim @ 2026-05-31  7:06 UTC (permalink / raw)
  To: dsahern, idosch, davem, edumazet, kuba, pabeni, horms,
	steffen.klassert, herbert, andrew+netdev, kuniyu, jlayton
  Cc: netdev, imv4bel

ip_mc_output(), __ip_local_out(), xfrm4_output(), raw_send_hdrinc(),
xfrm_output_resume() and vrf_output() read the egress network device
from rt->dst.dev / skb_dst(skb)->dev / skb_dst_dev(skb) and then pass
that pointer as the 'out' device argument of NF_HOOK without holding
rcu_read_lock.

dst->dev is a field protected by RCU. When a device is unregistered its
value is replaced with blackhole_netdev, and the previous device is
freed after an RCU grace period. A section that reads dst->dev and uses
that pointer must therefore hold rcu_read_lock. The functions above are
missing this protection, so when the egress device is unregistered
concurrently with transmission, a LOCAL_OUT / POST_ROUTING hook (nft
meta oif, selinux_ip_postroute_compat, etc.) can reference a device
pointer that is no longer valid.

In the leaves above, wrap the section that reads the device and runs
NF_HOOK in rcu_read_lock(), and read the device through the RCU
accessors (skb_dst_dev_rcu() / dst_dev_rcu()). As a result, the RCU
grace period cannot complete while a transmission is in flight, so the
egress device is not freed and the hook always references a valid
device.

Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
---
Changes in v2:
- Changed to a net-wide patch that also fixes the issue in
raw_send_hdrinc(), xfrm_output_resume() and vrf_output().
- v1: https://lore.kernel.org/all/ahqIg6vURwYI0LJ5@v4bel/
---
 drivers/net/vrf.c       | 16 +++++++++++-----
 net/ipv4/ip_output.c    | 27 +++++++++++++++++++--------
 net/ipv4/raw.c          |  4 +++-
 net/ipv4/xfrm4_output.c | 13 +++++++++----
 net/xfrm/xfrm_output.c  |  4 +++-
 5 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 46209917ae4d..e9a1dd961805 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -833,17 +833,23 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
 
 static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net_device *dev = skb_dst(skb)->dev;
+	struct net_device *dev;
+	int ret;
+
+	rcu_read_lock();
+	dev = skb_dst_dev_rcu(skb);
 
 	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-			    net, sk, skb, NULL, dev,
-			    vrf_finish_output,
-			    !(IPCB(skb)->flags & IPSKB_REROUTED));
+	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			   net, sk, skb, NULL, dev,
+			   vrf_finish_output,
+			   !(IPCB(skb)->flags & IPSKB_REROUTED));
+	rcu_read_unlock();
+	return ret;
 }
 
 /* set dst on skb to send packet to us via dev_xmit path. Allows
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5bcd73cbdb41..92a70b7e74a6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -102,6 +102,7 @@ EXPORT_SYMBOL(ip_send_check);
 int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
+	int err;
 
 	IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS);
 
@@ -117,9 +118,12 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	skb->protocol = htons(ETH_P_IP);
 
-	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
-		       net, sk, skb, NULL, skb_dst_dev(skb),
-		       dst_output);
+	rcu_read_lock();
+	err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+		      net, sk, skb, NULL, skb_dst_dev_rcu(skb),
+		      dst_output);
+	rcu_read_unlock();
+	return err;
 }
 
 int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -368,7 +372,11 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
 int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct rtable *rt = skb_rtable(skb);
-	struct net_device *dev = rt->dst.dev;
+	struct net_device *dev;
+	int ret;
+
+	rcu_read_lock();
+	dev = dst_dev_rcu(&rt->dst);
 
 	/*
 	 *	If the indicated interface is up and running, send the packet.
@@ -407,6 +415,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 		if (ip_hdr(skb)->ttl == 0) {
 			kfree_skb(skb);
+			rcu_read_unlock();
 			return 0;
 		}
 	}
@@ -419,10 +428,12 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 				ip_mc_finish_output);
 	}
 
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-			    net, sk, skb, NULL, skb->dev,
-			    ip_finish_output,
-			    !(IPCB(skb)->flags & IPSKB_REROUTED));
+	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			   net, sk, skb, NULL, skb->dev,
+			   ip_finish_output,
+			   !(IPCB(skb)->flags & IPSKB_REROUTED));
+	rcu_read_unlock();
+	return ret;
 }
 
 int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 68e88cb3e55c..555e62eacdc6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -410,9 +410,11 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 				skb_transport_header(skb))->type);
 	}
 
+	rcu_read_lock();
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
-		      net, sk, skb, NULL, rt->dst.dev,
+		      net, sk, skb, NULL, skb_dst_dev_rcu(skb),
 		      dst_output);
+	rcu_read_unlock();
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 0ae67d537499..d5dc084777a3 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,10 +30,15 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-			    net, sk, skb, skb->dev, skb_dst_dev(skb),
-			    __xfrm4_output,
-			    !(IPCB(skb)->flags & IPSKB_REROUTED));
+	int ret;
+
+	rcu_read_lock();
+	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			   net, sk, skb, skb->dev, skb_dst_dev_rcu(skb),
+			   __xfrm4_output,
+			   !(IPCB(skb)->flags & IPSKB_REROUTED));
+	rcu_read_unlock();
+	return ret;
 }
 
 void xfrm4_local_error(struct sk_buff *skb, u32 mtu)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index cc35c2fcbbe0..2b69ac55c8a5 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -604,9 +604,11 @@ int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
 		if (!skb_dst(skb)->xfrm)
 			return dst_output(net, sk, skb);
 
+		rcu_read_lock();
 		err = nf_hook(skb_dst(skb)->ops->family,
 			      NF_INET_POST_ROUTING, net, sk, skb,
-			      NULL, skb_dst(skb)->dev, xfrm_output2);
+			      NULL, skb_dst_dev_rcu(skb), xfrm_output2);
+		rcu_read_unlock();
 		if (unlikely(err != 1))
 			goto out;
 	}
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock
  2026-05-31  7:06 [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock Hyunwoo Kim
@ 2026-06-02 15:45 ` Ido Schimmel
  2026-06-02 16:24   ` David Ahern
  0 siblings, 1 reply; 5+ messages in thread
From: Ido Schimmel @ 2026-06-02 15:45 UTC (permalink / raw)
  To: Hyunwoo Kim
  Cc: dsahern, davem, edumazet, kuba, pabeni, horms, steffen.klassert,
	herbert, andrew+netdev, kuniyu, jlayton, netdev

On Sun, May 31, 2026 at 04:06:50PM +0900, Hyunwoo Kim wrote:
>  drivers/net/vrf.c       | 16 +++++++++++-----
>  net/ipv4/ip_output.c    | 27 +++++++++++++++++++--------
>  net/ipv4/raw.c          |  4 +++-
>  net/ipv4/xfrm4_output.c | 13 +++++++++----
>  net/xfrm/xfrm_output.c  |  4 +++-
>  5 files changed, 45 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
> index 46209917ae4d..e9a1dd961805 100644
> --- a/drivers/net/vrf.c
> +++ b/drivers/net/vrf.c
> @@ -833,17 +833,23 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
>  
>  static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>  {
> -	struct net_device *dev = skb_dst(skb)->dev;
> +	struct net_device *dev;
> +	int ret;
> +
> +	rcu_read_lock();
> +	dev = skb_dst_dev_rcu(skb);
>  
>  	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
>  
>  	skb->dev = dev;
>  	skb->protocol = htons(ETH_P_IP);
>  
> -	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> -			    net, sk, skb, NULL, dev,
> -			    vrf_finish_output,
> -			    !(IPCB(skb)->flags & IPSKB_REROUTED));
> +	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> +			   net, sk, skb, NULL, dev,
> +			   vrf_finish_output,
> +			   !(IPCB(skb)->flags & IPSKB_REROUTED));
> +	rcu_read_unlock();
> +	return ret;
>  }

Patch LGTM, thanks, but what about the IPv6 counterpart (vrf_output6())
and the other similar existing issues that Sashiko is flagging?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock
  2026-06-02 15:45 ` Ido Schimmel
@ 2026-06-02 16:24   ` David Ahern
  2026-06-03 12:58     ` Hyunwoo Kim
  0 siblings, 1 reply; 5+ messages in thread
From: David Ahern @ 2026-06-02 16:24 UTC (permalink / raw)
  To: Ido Schimmel, Hyunwoo Kim
  Cc: davem, edumazet, kuba, pabeni, horms, steffen.klassert, herbert,
	andrew+netdev, kuniyu, jlayton, netdev

On 6/2/26 9:45 AM, Ido Schimmel wrote:
> On Sun, May 31, 2026 at 04:06:50PM +0900, Hyunwoo Kim wrote:
>>  drivers/net/vrf.c       | 16 +++++++++++-----
>>  net/ipv4/ip_output.c    | 27 +++++++++++++++++++--------
>>  net/ipv4/raw.c          |  4 +++-
>>  net/ipv4/xfrm4_output.c | 13 +++++++++----
>>  net/xfrm/xfrm_output.c  |  4 +++-
>>  5 files changed, 45 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
>> index 46209917ae4d..e9a1dd961805 100644
>> --- a/drivers/net/vrf.c
>> +++ b/drivers/net/vrf.c
>> @@ -833,17 +833,23 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
>>  
>>  static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>>  {
>> -	struct net_device *dev = skb_dst(skb)->dev;
>> +	struct net_device *dev;
>> +	int ret;
>> +
>> +	rcu_read_lock();
>> +	dev = skb_dst_dev_rcu(skb);
>>  
>>  	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
>>  
>>  	skb->dev = dev;
>>  	skb->protocol = htons(ETH_P_IP);
>>  
>> -	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
>> -			    net, sk, skb, NULL, dev,
>> -			    vrf_finish_output,
>> -			    !(IPCB(skb)->flags & IPSKB_REROUTED));
>> +	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
>> +			   net, sk, skb, NULL, dev,
>> +			   vrf_finish_output,
>> +			   !(IPCB(skb)->flags & IPSKB_REROUTED));
>> +	rcu_read_unlock();
>> +	return ret;
>>  }
> 
> Patch LGTM, thanks, but what about the IPv6 counterpart (vrf_output6())
> and the other similar existing issues that Sashiko is flagging?

Common locking at a higher level than dst->output seems like a better
approach. This function is called under rcu_read_lock for some paths.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock
  2026-06-02 16:24   ` David Ahern
@ 2026-06-03 12:58     ` Hyunwoo Kim
  2026-06-03 14:52       ` David Ahern
  0 siblings, 1 reply; 5+ messages in thread
From: Hyunwoo Kim @ 2026-06-03 12:58 UTC (permalink / raw)
  To: David Ahern, idosch
  Cc: davem, edumazet, kuba, pabeni, horms, steffen.klassert, herbert,
	andrew+netdev, kuniyu, jlayton, netdev, imv4bel

On Tue, Jun 02, 2026 at 10:24:09AM -0600, David Ahern wrote:
> On 6/2/26 9:45 AM, Ido Schimmel wrote:
> > On Sun, May 31, 2026 at 04:06:50PM +0900, Hyunwoo Kim wrote:
> >>  drivers/net/vrf.c       | 16 +++++++++++-----
> >>  net/ipv4/ip_output.c    | 27 +++++++++++++++++++--------
> >>  net/ipv4/raw.c          |  4 +++-
> >>  net/ipv4/xfrm4_output.c | 13 +++++++++----
> >>  net/xfrm/xfrm_output.c  |  4 +++-
> >>  5 files changed, 45 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
> >> index 46209917ae4d..e9a1dd961805 100644
> >> --- a/drivers/net/vrf.c
> >> +++ b/drivers/net/vrf.c
> >> @@ -833,17 +833,23 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
> >>  
> >>  static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
> >>  {
> >> -	struct net_device *dev = skb_dst(skb)->dev;
> >> +	struct net_device *dev;
> >> +	int ret;
> >> +
> >> +	rcu_read_lock();
> >> +	dev = skb_dst_dev_rcu(skb);
> >>  
> >>  	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
> >>  
> >>  	skb->dev = dev;
> >>  	skb->protocol = htons(ETH_P_IP);
> >>  
> >> -	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> >> -			    net, sk, skb, NULL, dev,
> >> -			    vrf_finish_output,
> >> -			    !(IPCB(skb)->flags & IPSKB_REROUTED));
> >> +	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> >> +			   net, sk, skb, NULL, dev,
> >> +			   vrf_finish_output,
> >> +			   !(IPCB(skb)->flags & IPSKB_REROUTED));
> >> +	rcu_read_unlock();
> >> +	return ret;
> >>  }
> > 
> > Patch LGTM, thanks, but what about the IPv6 counterpart (vrf_output6())

I left ipv6 out because its callers (ip6_send_skb, ip6_xmit, etc.)
already hold rcu. Though some niche paths may not. (maybe rxe_send?)

> > and the other similar existing issues that Sashiko is flagging?

I couldn't find Sashiko's review. Could you share a url?

> 
> Common locking at a higher level than dst->output seems like a better
> approach. This function is called under rcu_read_lock for some paths.

Adding rcu to ip_local_out would protect all functions reached via
dst_output in one place; raw_send_hdrinc and xfrm_output_resume still
need their own patches. What do you think of this diff?

As a follow-up, the rcu in ip_output etc. then becomes redundant and
should be removed.


Best regards,
Hyunwoo Kim

---

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5bcd73cbdb41..26b51ef0763f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -126,9 +126,11 @@ int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	int err;

+	rcu_read_lock();
 	err = __ip_local_out(net, sk, skb);
 	if (likely(err == 1))
 		err = dst_output(net, sk, skb);
+	rcu_read_unlock();

 	return err;
 }
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 68e88cb3e55c..555e62eacdc6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -410,9 +410,11 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 				skb_transport_header(skb))->type);
 	}

+	rcu_read_lock();
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
-		      net, sk, skb, NULL, rt->dst.dev,
+		      net, sk, skb, NULL, skb_dst_dev_rcu(skb),
 		      dst_output);
+	rcu_read_unlock();
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index cc35c2fcbbe0..c5ac360da38d 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -594,6 +594,7 @@ int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
 {
 	struct net *net = xs_net(skb_dst(skb)->xfrm);

+	rcu_read_lock();
 	while (likely((err = xfrm_output_one(skb, err)) == 0)) {
 		nf_reset_ct(skb);

@@ -601,12 +602,14 @@ int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
 		if (unlikely(err != 1))
 			goto out;

-		if (!skb_dst(skb)->xfrm)
-			return dst_output(net, sk, skb);
+		if (!skb_dst(skb)->xfrm) {
+			err = dst_output(net, sk, skb);
+			goto out;
+		}

 		err = nf_hook(skb_dst(skb)->ops->family,
 			      NF_INET_POST_ROUTING, net, sk, skb,
-			      NULL, skb_dst(skb)->dev, xfrm_output2);
+			      NULL, skb_dst_dev_rcu(skb), xfrm_output2);
 		if (unlikely(err != 1))
 			goto out;
 	}
@@ -615,6 +618,7 @@ int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
 		err = 0;

 out:
+	rcu_read_unlock();
 	return err;
 }
 EXPORT_SYMBOL_GPL(xfrm_output_resume);

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock
  2026-06-03 12:58     ` Hyunwoo Kim
@ 2026-06-03 14:52       ` David Ahern
  0 siblings, 0 replies; 5+ messages in thread
From: David Ahern @ 2026-06-03 14:52 UTC (permalink / raw)
  To: Hyunwoo Kim, idosch
  Cc: davem, edumazet, kuba, pabeni, horms, steffen.klassert, herbert,
	andrew+netdev, kuniyu, jlayton, netdev

On 6/3/26 6:58 AM, Hyunwoo Kim wrote:
> On Tue, Jun 02, 2026 at 10:24:09AM -0600, David Ahern wrote:
>> On 6/2/26 9:45 AM, Ido Schimmel wrote:
>>> On Sun, May 31, 2026 at 04:06:50PM +0900, Hyunwoo Kim wrote:
>>>>  drivers/net/vrf.c       | 16 +++++++++++-----
>>>>  net/ipv4/ip_output.c    | 27 +++++++++++++++++++--------
>>>>  net/ipv4/raw.c          |  4 +++-
>>>>  net/ipv4/xfrm4_output.c | 13 +++++++++----
>>>>  net/xfrm/xfrm_output.c  |  4 +++-
>>>>  5 files changed, 45 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
>>>> index 46209917ae4d..e9a1dd961805 100644
>>>> --- a/drivers/net/vrf.c
>>>> +++ b/drivers/net/vrf.c
>>>> @@ -833,17 +833,23 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
>>>>  
>>>>  static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>>>>  {
>>>> -	struct net_device *dev = skb_dst(skb)->dev;
>>>> +	struct net_device *dev;
>>>> +	int ret;
>>>> +
>>>> +	rcu_read_lock();
>>>> +	dev = skb_dst_dev_rcu(skb);
>>>>  
>>>>  	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
>>>>  
>>>>  	skb->dev = dev;
>>>>  	skb->protocol = htons(ETH_P_IP);
>>>>  
>>>> -	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
>>>> -			    net, sk, skb, NULL, dev,
>>>> -			    vrf_finish_output,
>>>> -			    !(IPCB(skb)->flags & IPSKB_REROUTED));
>>>> +	ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
>>>> +			   net, sk, skb, NULL, dev,
>>>> +			   vrf_finish_output,
>>>> +			   !(IPCB(skb)->flags & IPSKB_REROUTED));
>>>> +	rcu_read_unlock();
>>>> +	return ret;
>>>>  }
>>>
>>> Patch LGTM, thanks, but what about the IPv6 counterpart (vrf_output6())
> 
> I left ipv6 out because its callers (ip6_send_skb, ip6_xmit, etc.)
> already hold rcu. Though some niche paths may not. (maybe rxe_send?)
> 
>>> and the other similar existing issues that Sashiko is flagging?
> 
> I couldn't find Sashiko's review. Could you share a url?
> 
>>
>> Common locking at a higher level than dst->output seems like a better
>> approach. This function is called under rcu_read_lock for some paths.
> 
> Adding rcu to ip_local_out would protect all functions reached via
> dst_output in one place; raw_send_hdrinc and xfrm_output_resume still
> need their own patches. What do you think of this diff?
> 
> As a follow-up, the rcu in ip_output etc. then becomes redundant and
> should be removed.
> 
> 
> Best regards,
> Hyunwoo Kim
> 
> ---
> 
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 5bcd73cbdb41..26b51ef0763f 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -126,9 +126,11 @@ int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
>  {
>  	int err;
> 
> +	rcu_read_lock();
>  	err = __ip_local_out(net, sk, skb);
>  	if (likely(err == 1))
>  		err = dst_output(net, sk, skb);
> +	rcu_read_unlock();
> 
>  	return err;
>  }
> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
> index 68e88cb3e55c..555e62eacdc6 100644
> --- a/net/ipv4/raw.c
> +++ b/net/ipv4/raw.c
> @@ -410,9 +410,11 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
>  				skb_transport_header(skb))->type);
>  	}
> 
> +	rcu_read_lock();
>  	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
> -		      net, sk, skb, NULL, rt->dst.dev,
> +		      net, sk, skb, NULL, skb_dst_dev_rcu(skb),
>  		      dst_output);
> +	rcu_read_unlock();
>  	if (err > 0)
>  		err = net_xmit_errno(err);
>  	if (err)
> diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
> index cc35c2fcbbe0..c5ac360da38d 100644
> --- a/net/xfrm/xfrm_output.c
> +++ b/net/xfrm/xfrm_output.c
> @@ -594,6 +594,7 @@ int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
>  {
>  	struct net *net = xs_net(skb_dst(skb)->xfrm);
> 
> +	rcu_read_lock();
>  	while (likely((err = xfrm_output_one(skb, err)) == 0)) {
>  		nf_reset_ct(skb);
> 
> @@ -601,12 +602,14 @@ int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
>  		if (unlikely(err != 1))
>  			goto out;
> 
> -		if (!skb_dst(skb)->xfrm)
> -			return dst_output(net, sk, skb);
> +		if (!skb_dst(skb)->xfrm) {
> +			err = dst_output(net, sk, skb);
> +			goto out;
> +		}
> 
>  		err = nf_hook(skb_dst(skb)->ops->family,
>  			      NF_INET_POST_ROUTING, net, sk, skb,
> -			      NULL, skb_dst(skb)->dev, xfrm_output2);
> +			      NULL, skb_dst_dev_rcu(skb), xfrm_output2);
>  		if (unlikely(err != 1))
>  			goto out;
>  	}
> @@ -615,6 +618,7 @@ int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
>  		err = 0;
> 
>  out:
> +	rcu_read_unlock();
>  	return err;
>  }
>  EXPORT_SYMBOL_GPL(xfrm_output_resume);

I prefer this over locking in each of the output functions. IPv6 should
be updated as well.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-06-03 14:52 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-31  7:06 [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock Hyunwoo Kim
2026-06-02 15:45 ` Ido Schimmel
2026-06-02 16:24   ` David Ahern
2026-06-03 12:58     ` Hyunwoo Kim
2026-06-03 14:52       ` David Ahern

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox