* [PATCH net-next 1/2] ip_tunnel: use WRITE_ONCE in ip_tunnel_encap_setup
2026-07-01 15:51 [PATCH net-next 0/2] sit: prepare for RTNL-less link dumping Eric Dumazet
@ 2026-07-01 15:51 ` Eric Dumazet
2026-07-02 3:11 ` Kuniyuki Iwashima
2026-07-01 15:51 ` [PATCH net-next 2/2] sit: no longer rely on RTNL in ipip6_fill_info() Eric Dumazet
1 sibling, 1 reply; 5+ messages in thread
From: Eric Dumazet @ 2026-07-01 15:51 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Kuniyuki Iwashima, Ido Schimmel, David Ahern,
netdev, eric.dumazet, Eric Dumazet
Update ip_tunnel_encap_setup() to use WRITE_ONCE() when writing
to encap fields (type, sport, dport, flags) and hlen fields.
This ensures that concurrent lockless readers (like fill_info)
do not see torn writes.
Also remove the unsafe memset() on t->encap which could cause
concurrent readers to transiently see zeroed fields.
Removing it also fixes a bug where t->encap was left cleared
even if ip_encap_hlen() failed, resulting in partial configuration.
Fixes: 56328486539d ("net: Changes to ip_tunnel to support foo-over-udp encapsulation")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/ip_tunnel.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 9d114bd575f928b0ab46ef3007e456692d82b497..f7bbdd1bd323b1973ec1ea1d93f0ed6ab703bcd3 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -491,19 +491,17 @@ int ip_tunnel_encap_setup(struct ip_tunnel *t,
{
int hlen;
- memset(&t->encap, 0, sizeof(t->encap));
-
hlen = ip_encap_hlen(ipencap);
if (hlen < 0)
return hlen;
- t->encap.type = ipencap->type;
- t->encap.sport = ipencap->sport;
- t->encap.dport = ipencap->dport;
- t->encap.flags = ipencap->flags;
+ WRITE_ONCE(t->encap.type, ipencap->type);
+ WRITE_ONCE(t->encap.sport, ipencap->sport);
+ WRITE_ONCE(t->encap.dport, ipencap->dport);
+ WRITE_ONCE(t->encap.flags, ipencap->flags);
- t->encap_hlen = hlen;
- t->hlen = t->encap_hlen + t->tun_hlen;
+ WRITE_ONCE(t->encap_hlen, hlen);
+ WRITE_ONCE(t->hlen, hlen + t->tun_hlen);
return 0;
}
--
2.55.0.rc0.799.gd6f94ed593-goog
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH net-next 2/2] sit: no longer rely on RTNL in ipip6_fill_info()
2026-07-01 15:51 [PATCH net-next 0/2] sit: prepare for RTNL-less link dumping Eric Dumazet
2026-07-01 15:51 ` [PATCH net-next 1/2] ip_tunnel: use WRITE_ONCE in ip_tunnel_encap_setup Eric Dumazet
@ 2026-07-01 15:51 ` Eric Dumazet
2026-07-02 3:13 ` Kuniyuki Iwashima
1 sibling, 1 reply; 5+ messages in thread
From: Eric Dumazet @ 2026-07-01 15:51 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Kuniyuki Iwashima, Ido Schimmel, David Ahern,
netdev, eric.dumazet, Eric Dumazet
Update ipip6_fill_info() to read configuration fields (link, ttl, tos,
proto, i_flags, fwmark, 6rd prefix, encap type/sport/dport/flags)
locklessly using READ_ONCE().
Annotate the bitmap reads for i_flags by copying the first element
atomically using READ_ONCE() into a local variable, as the whole
bitmap fits in one unsigned long.
This allows ipip6_fill_info() to run safely without RTNL.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/sit.c | 70 ++++++++++++++++++++++++++++----------------------
1 file changed, 40 insertions(+), 30 deletions(-)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a38b24fb838424b6d3cb063d77aa85cf719ce6c5..c7abbb09bfd3dfada6fc1e1682ac42acb7248ad9 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1143,17 +1143,17 @@ static void ipip6_tunnel_update(struct ip_tunnel *t,
ipip6_tunnel_unlink(sitn, t);
synchronize_net();
- t->parms.iph.saddr = p->iph.saddr;
- t->parms.iph.daddr = p->iph.daddr;
+ WRITE_ONCE(t->parms.iph.saddr, p->iph.saddr);
+ WRITE_ONCE(t->parms.iph.daddr, p->iph.daddr);
__dev_addr_set(t->dev, &p->iph.saddr, 4);
memcpy(t->dev->broadcast, &p->iph.daddr, 4);
ipip6_tunnel_link(sitn, t);
- t->parms.iph.ttl = p->iph.ttl;
- t->parms.iph.tos = p->iph.tos;
- t->parms.iph.frag_off = p->iph.frag_off;
- if (t->parms.link != p->link || t->fwmark != fwmark) {
- t->parms.link = p->link;
- t->fwmark = fwmark;
+ WRITE_ONCE(t->parms.iph.ttl, p->iph.ttl);
+ WRITE_ONCE(t->parms.iph.tos, p->iph.tos);
+ WRITE_ONCE(t->parms.iph.frag_off, p->iph.frag_off);
+ if (READ_ONCE(t->parms.link) != p->link || READ_ONCE(t->fwmark) != fwmark) {
+ WRITE_ONCE(t->parms.link, p->link);
+ WRITE_ONCE(t->fwmark, fwmark);
ipip6_tunnel_bind_dev(t->dev);
}
dst_cache_reset(&t->dst_cache);
@@ -1184,9 +1184,9 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
return -EINVAL;
t->ip6rd.prefix = prefix;
- t->ip6rd.relay_prefix = relay_prefix;
- t->ip6rd.prefixlen = ip6rd->prefixlen;
- t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
+ WRITE_ONCE(t->ip6rd.relay_prefix, relay_prefix);
+ WRITE_ONCE(t->ip6rd.prefixlen, ip6rd->prefixlen);
+ WRITE_ONCE(t->ip6rd.relay_prefixlen, ip6rd->relay_prefixlen);
dst_cache_reset(&t->dst_cache);
netdev_state_change(t->dev);
return 0;
@@ -1693,42 +1693,52 @@ static size_t ipip6_get_size(const struct net_device *dev)
static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_parm_kern *parm = &tunnel->parms;
-
- if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
- nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
- nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
- nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
- nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
+ const struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct ip_tunnel_parm_kern *parm;
+ IP_TUNNEL_DECLARE_FLAGS(i_flags);
+ __be16 frag_off;
+ __be32 daddr;
+ __be32 saddr;
+
+ parm = &tunnel->parms;
+ i_flags[0] = READ_ONCE(parm->i_flags[0]);
+ frag_off = READ_ONCE(parm->iph.frag_off);
+ saddr = READ_ONCE(parm->iph.saddr);
+ daddr = READ_ONCE(parm->iph.daddr);
+
+ if (nla_put_u32(skb, IFLA_IPTUN_LINK, READ_ONCE(parm->link)) ||
+ nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, saddr) ||
+ nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, daddr) ||
+ nla_put_u8(skb, IFLA_IPTUN_TTL, READ_ONCE(parm->iph.ttl)) ||
+ nla_put_u8(skb, IFLA_IPTUN_TOS, READ_ONCE(parm->iph.tos)) ||
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
- !!(parm->iph.frag_off & htons(IP_DF))) ||
- nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
+ !!(frag_off & htons(IP_DF))) ||
+ nla_put_u8(skb, IFLA_IPTUN_PROTO, READ_ONCE(parm->iph.protocol)) ||
nla_put_be16(skb, IFLA_IPTUN_FLAGS,
- ip_tunnel_flags_to_be16(parm->i_flags)) ||
- nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
+ ip_tunnel_flags_to_be16(i_flags)) ||
+ nla_put_u32(skb, IFLA_IPTUN_FWMARK, READ_ONCE(tunnel->fwmark)))
goto nla_put_failure;
#ifdef CONFIG_IPV6_SIT_6RD
if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX,
&tunnel->ip6rd.prefix) ||
nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
- tunnel->ip6rd.relay_prefix) ||
+ READ_ONCE(tunnel->ip6rd.relay_prefix)) ||
nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN,
- tunnel->ip6rd.prefixlen) ||
+ READ_ONCE(tunnel->ip6rd.prefixlen)) ||
nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
- tunnel->ip6rd.relay_prefixlen))
+ READ_ONCE(tunnel->ip6rd.relay_prefixlen)))
goto nla_put_failure;
#endif
if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
- tunnel->encap.type) ||
+ READ_ONCE(tunnel->encap.type)) ||
nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
- tunnel->encap.sport) ||
+ READ_ONCE(tunnel->encap.sport)) ||
nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
- tunnel->encap.dport) ||
+ READ_ONCE(tunnel->encap.dport)) ||
nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
- tunnel->encap.flags))
+ READ_ONCE(tunnel->encap.flags)))
goto nla_put_failure;
return 0;
--
2.55.0.rc0.799.gd6f94ed593-goog
^ permalink raw reply related [flat|nested] 5+ messages in thread