* [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock
@ 2026-05-31 7:06 Hyunwoo Kim
2026-06-02 15:45 ` Ido Schimmel
0 siblings, 1 reply; 3+ messages in thread
From: Hyunwoo Kim @ 2026-05-31 7:06 UTC (permalink / raw)
To: dsahern, idosch, davem, edumazet, kuba, pabeni, horms,
steffen.klassert, herbert, andrew+netdev, kuniyu, jlayton
Cc: netdev, imv4bel
ip_mc_output(), __ip_local_out(), xfrm4_output(), raw_send_hdrinc(),
xfrm_output_resume() and vrf_output() read the egress network device
from rt->dst.dev / skb_dst(skb)->dev / skb_dst_dev(skb) and then pass
that pointer as the 'out' device argument of NF_HOOK without holding
rcu_read_lock.
dst->dev is a field protected by RCU. When a device is unregistered its
value is replaced with blackhole_netdev, and the previous device is
freed after an RCU grace period. A section that reads dst->dev and uses
that pointer must therefore hold rcu_read_lock. The functions above are
missing this protection, so when the egress device is unregistered
concurrently with transmission, a LOCAL_OUT / POST_ROUTING hook (nft
meta oif, selinux_ip_postroute_compat, etc.) can reference a device
pointer that is no longer valid.
In the leaves above, wrap the section that reads the device and runs
NF_HOOK in rcu_read_lock(), and read the device through the RCU
accessors (skb_dst_dev_rcu() / dst_dev_rcu()). As a result, the RCU
grace period cannot complete while a transmission is in flight, so the
egress device is not freed and the hook always references a valid
device.
Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()")
Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com>
---
Changes in v2:
- Changed to a net-wide patch that also fixes the issue in
raw_send_hdrinc(), xfrm_output_resume() and vrf_output().
- v1: https://lore.kernel.org/all/ahqIg6vURwYI0LJ5@v4bel/
---
drivers/net/vrf.c | 16 +++++++++++-----
net/ipv4/ip_output.c | 27 +++++++++++++++++++--------
net/ipv4/raw.c | 4 +++-
net/ipv4/xfrm4_output.c | 13 +++++++++----
net/xfrm/xfrm_output.c | 4 +++-
5 files changed, 45 insertions(+), 19 deletions(-)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 46209917ae4d..e9a1dd961805 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -833,17 +833,23 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev;
+ int ret;
+
+ rcu_read_lock();
+ dev = skb_dst_dev_rcu(skb);
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, dev,
- vrf_finish_output,
- !(IPCB(skb)->flags & IPSKB_REROUTED));
+ ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, dev,
+ vrf_finish_output,
+ !(IPCB(skb)->flags & IPSKB_REROUTED));
+ rcu_read_unlock();
+ return ret;
}
/* set dst on skb to send packet to us via dev_xmit path. Allows
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5bcd73cbdb41..92a70b7e74a6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -102,6 +102,7 @@ EXPORT_SYMBOL(ip_send_check);
int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
+ int err;
IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS);
@@ -117,9 +118,12 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IP);
- return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
- net, sk, skb, NULL, skb_dst_dev(skb),
- dst_output);
+ rcu_read_lock();
+ err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+ net, sk, skb, NULL, skb_dst_dev_rcu(skb),
+ dst_output);
+ rcu_read_unlock();
+ return err;
}
int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -368,7 +372,11 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct rtable *rt = skb_rtable(skb);
- struct net_device *dev = rt->dst.dev;
+ struct net_device *dev;
+ int ret;
+
+ rcu_read_lock();
+ dev = dst_dev_rcu(&rt->dst);
/*
* If the indicated interface is up and running, send the packet.
@@ -407,6 +415,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (ip_hdr(skb)->ttl == 0) {
kfree_skb(skb);
+ rcu_read_unlock();
return 0;
}
}
@@ -419,10 +428,12 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
ip_mc_finish_output);
}
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb->dev,
- ip_finish_output,
- !(IPCB(skb)->flags & IPSKB_REROUTED));
+ ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, skb->dev,
+ ip_finish_output,
+ !(IPCB(skb)->flags & IPSKB_REROUTED));
+ rcu_read_unlock();
+ return ret;
}
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 68e88cb3e55c..555e62eacdc6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -410,9 +410,11 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb_transport_header(skb))->type);
}
+ rcu_read_lock();
err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
- net, sk, skb, NULL, rt->dst.dev,
+ net, sk, skb, NULL, skb_dst_dev_rcu(skb),
dst_output);
+ rcu_read_unlock();
if (err > 0)
err = net_xmit_errno(err);
if (err)
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 0ae67d537499..d5dc084777a3 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,10 +30,15 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- net, sk, skb, skb->dev, skb_dst_dev(skb),
- __xfrm4_output,
- !(IPCB(skb)->flags & IPSKB_REROUTED));
+ int ret;
+
+ rcu_read_lock();
+ ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+ net, sk, skb, skb->dev, skb_dst_dev_rcu(skb),
+ __xfrm4_output,
+ !(IPCB(skb)->flags & IPSKB_REROUTED));
+ rcu_read_unlock();
+ return ret;
}
void xfrm4_local_error(struct sk_buff *skb, u32 mtu)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index cc35c2fcbbe0..2b69ac55c8a5 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -604,9 +604,11 @@ int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err)
if (!skb_dst(skb)->xfrm)
return dst_output(net, sk, skb);
+ rcu_read_lock();
err = nf_hook(skb_dst(skb)->ops->family,
NF_INET_POST_ROUTING, net, sk, skb,
- NULL, skb_dst(skb)->dev, xfrm_output2);
+ NULL, skb_dst_dev_rcu(skb), xfrm_output2);
+ rcu_read_unlock();
if (unlikely(err != 1))
goto out;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock
2026-05-31 7:06 [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock Hyunwoo Kim
@ 2026-06-02 15:45 ` Ido Schimmel
2026-06-02 16:24 ` David Ahern
0 siblings, 1 reply; 3+ messages in thread
From: Ido Schimmel @ 2026-06-02 15:45 UTC (permalink / raw)
To: Hyunwoo Kim
Cc: dsahern, davem, edumazet, kuba, pabeni, horms, steffen.klassert,
herbert, andrew+netdev, kuniyu, jlayton, netdev
On Sun, May 31, 2026 at 04:06:50PM +0900, Hyunwoo Kim wrote:
> drivers/net/vrf.c | 16 +++++++++++-----
> net/ipv4/ip_output.c | 27 +++++++++++++++++++--------
> net/ipv4/raw.c | 4 +++-
> net/ipv4/xfrm4_output.c | 13 +++++++++----
> net/xfrm/xfrm_output.c | 4 +++-
> 5 files changed, 45 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
> index 46209917ae4d..e9a1dd961805 100644
> --- a/drivers/net/vrf.c
> +++ b/drivers/net/vrf.c
> @@ -833,17 +833,23 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
>
> static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
> {
> - struct net_device *dev = skb_dst(skb)->dev;
> + struct net_device *dev;
> + int ret;
> +
> + rcu_read_lock();
> + dev = skb_dst_dev_rcu(skb);
>
> IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
>
> skb->dev = dev;
> skb->protocol = htons(ETH_P_IP);
>
> - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> - net, sk, skb, NULL, dev,
> - vrf_finish_output,
> - !(IPCB(skb)->flags & IPSKB_REROUTED));
> + ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> + net, sk, skb, NULL, dev,
> + vrf_finish_output,
> + !(IPCB(skb)->flags & IPSKB_REROUTED));
> + rcu_read_unlock();
> + return ret;
> }
Patch LGTM, thanks, but what about the IPv6 counterpart (vrf_output6())
and the other similar existing issues that Sashiko is flagging?
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock
2026-06-02 15:45 ` Ido Schimmel
@ 2026-06-02 16:24 ` David Ahern
0 siblings, 0 replies; 3+ messages in thread
From: David Ahern @ 2026-06-02 16:24 UTC (permalink / raw)
To: Ido Schimmel, Hyunwoo Kim
Cc: davem, edumazet, kuba, pabeni, horms, steffen.klassert, herbert,
andrew+netdev, kuniyu, jlayton, netdev
On 6/2/26 9:45 AM, Ido Schimmel wrote:
> On Sun, May 31, 2026 at 04:06:50PM +0900, Hyunwoo Kim wrote:
>> drivers/net/vrf.c | 16 +++++++++++-----
>> net/ipv4/ip_output.c | 27 +++++++++++++++++++--------
>> net/ipv4/raw.c | 4 +++-
>> net/ipv4/xfrm4_output.c | 13 +++++++++----
>> net/xfrm/xfrm_output.c | 4 +++-
>> 5 files changed, 45 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
>> index 46209917ae4d..e9a1dd961805 100644
>> --- a/drivers/net/vrf.c
>> +++ b/drivers/net/vrf.c
>> @@ -833,17 +833,23 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
>>
>> static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>> {
>> - struct net_device *dev = skb_dst(skb)->dev;
>> + struct net_device *dev;
>> + int ret;
>> +
>> + rcu_read_lock();
>> + dev = skb_dst_dev_rcu(skb);
>>
>> IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
>>
>> skb->dev = dev;
>> skb->protocol = htons(ETH_P_IP);
>>
>> - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
>> - net, sk, skb, NULL, dev,
>> - vrf_finish_output,
>> - !(IPCB(skb)->flags & IPSKB_REROUTED));
>> + ret = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
>> + net, sk, skb, NULL, dev,
>> + vrf_finish_output,
>> + !(IPCB(skb)->flags & IPSKB_REROUTED));
>> + rcu_read_unlock();
>> + return ret;
>> }
>
> Patch LGTM, thanks, but what about the IPv6 counterpart (vrf_output6())
> and the other similar existing issues that Sashiko is flagging?
Common locking at a higher level than dst->output seems like a better
approach. This function is called under rcu_read_lock for some paths.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-06-02 16:24 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-31 7:06 [PATCH net v2] net: protect egress device access in the output path with rcu_read_lock Hyunwoo Kim
2026-06-02 15:45 ` Ido Schimmel
2026-06-02 16:24 ` David Ahern
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox