From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org, netdev@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev, syzbot <syzkaller@googlegroups.com>,
Eric Dumazet <edumazet@google.com>,
"David S. Miller" <davem@davemloft.net>,
Kazunori Kobayashi <kazunori.kobayashi@miraclelinux.com>
Subject: [PATCH 5.4 185/189] ipv6: annotate some data-races around sk->sk_prot
Date: Wed, 3 Jul 2024 12:40:46 +0200 [thread overview]
Message-ID: <20240703102848.454013596@linuxfoundation.org> (raw)
In-Reply-To: <20240703102841.492044697@linuxfoundation.org>
5.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Eric Dumazet <edumazet@google.com>
commit 086d49058cd8471046ae9927524708820f5fd1c7 upstream.
IPv6 has this hack changing sk->sk_prot when an IPv6 socket
is 'converted' to an IPv4 one with IPV6_ADDRFORM option.
This operation is only performed for TCP and UDP, knowing
their 'struct proto' for the two network families are populated
in the same way, and can not disappear while a reader
might use and dereference sk->sk_prot.
If we think about it all reads of sk->sk_prot while
either socket lock or RTNL is not acquired should be using READ_ONCE().
Also note that other layers like MPTCP, XFRM, CHELSIO_TLS also
write over sk->sk_prot.
BUG: KCSAN: data-race in inet6_recvmsg / ipv6_setsockopt
write to 0xffff8881386f7aa8 of 8 bytes by task 26932 on cpu 0:
do_ipv6_setsockopt net/ipv6/ipv6_sockglue.c:492 [inline]
ipv6_setsockopt+0x3758/0x3910 net/ipv6/ipv6_sockglue.c:1019
udpv6_setsockopt+0x85/0x90 net/ipv6/udp.c:1649
sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3489
__sys_setsockopt+0x209/0x2a0 net/socket.c:2180
__do_sys_setsockopt net/socket.c:2191 [inline]
__se_sys_setsockopt net/socket.c:2188 [inline]
__x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
read to 0xffff8881386f7aa8 of 8 bytes by task 26911 on cpu 1:
inet6_recvmsg+0x7a/0x210 net/ipv6/af_inet6.c:659
____sys_recvmsg+0x16c/0x320
___sys_recvmsg net/socket.c:2674 [inline]
do_recvmmsg+0x3f5/0xae0 net/socket.c:2768
__sys_recvmmsg net/socket.c:2847 [inline]
__do_sys_recvmmsg net/socket.c:2870 [inline]
__se_sys_recvmmsg net/socket.c:2863 [inline]
__x64_sys_recvmmsg+0xde/0x160 net/socket.c:2863
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
value changed: 0xffffffff85e0e980 -> 0xffffffff85e01580
Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 26911 Comm: syz-executor.3 Not tainted 5.17.0-rc2-syzkaller-00316-g0457e5153e0e-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Kazunori Kobayashi <kazunori.kobayashi@miraclelinux.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/ipv6/af_inet6.c | 24 ++++++++++++++++++------
net/ipv6/ipv6_sockglue.c | 6 ++++--
2 files changed, 22 insertions(+), 8 deletions(-)
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -441,11 +441,14 @@ out_unlock:
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
+ const struct proto *prot;
int err = 0;
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
/* If the socket has its own bind function then use it. */
- if (sk->sk_prot->bind)
- return sk->sk_prot->bind(sk, uaddr, addr_len);
+ if (prot->bind)
+ return prot->bind(sk, uaddr, addr_len);
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -554,6 +557,7 @@ int inet6_ioctl(struct socket *sock, uns
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
+ const struct proto *prot;
switch (cmd) {
case SIOCADDRT:
@@ -568,9 +572,11 @@ int inet6_ioctl(struct socket *sock, uns
case SIOCSIFDSTADDR:
return addrconf_set_dstaddr(net, (void __user *) arg);
default:
- if (!sk->sk_prot->ioctl)
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ if (!prot->ioctl)
return -ENOIOCTLCMD;
- return sk->sk_prot->ioctl(sk, cmd, arg);
+ return prot->ioctl(sk, cmd, arg);
}
/*NOTREACHED*/
return 0;
@@ -582,11 +588,14 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_send
int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
+ const struct proto *prot;
if (unlikely(inet_send_prepare(sk)))
return -EAGAIN;
- return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg,
sk, msg, size);
}
@@ -596,13 +605,16 @@ int inet6_recvmsg(struct socket *sock, s
int flags)
{
struct sock *sk = sock->sk;
+ const struct proto *prot;
int addr_len = 0;
int err;
if (likely(!(flags & MSG_ERRQUEUE)))
sock_rps_record_flow(sk);
- err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, &addr_len);
if (err >= 0)
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -222,7 +222,8 @@ static int do_ipv6_setsockopt(struct soc
sock_prot_inuse_add(net, sk->sk_prot, -1);
sock_prot_inuse_add(net, &tcp_prot, 1);
local_bh_enable();
- sk->sk_prot = &tcp_prot;
+ /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */
+ WRITE_ONCE(sk->sk_prot, &tcp_prot);
icsk->icsk_af_ops = &ipv4_specific;
sk->sk_socket->ops = &inet_stream_ops;
sk->sk_family = PF_INET;
@@ -236,7 +237,8 @@ static int do_ipv6_setsockopt(struct soc
sock_prot_inuse_add(net, sk->sk_prot, -1);
sock_prot_inuse_add(net, prot, 1);
local_bh_enable();
- sk->sk_prot = prot;
+ /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */
+ WRITE_ONCE(sk->sk_prot, prot);
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
next parent reply other threads:[~2024-07-03 10:58 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20240703102841.492044697@linuxfoundation.org>
2024-07-03 10:40 ` Greg Kroah-Hartman [this message]
2024-07-03 10:40 ` [PATCH 5.4 186/189] ipv6: Fix data races around sk->sk_prot Greg Kroah-Hartman
2024-07-03 10:40 ` [PATCH 5.4 187/189] tcp: Fix data races around icsk->icsk_af_ops Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240703102848.454013596@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kazunori.kobayashi@miraclelinux.com \
--cc=netdev@vger.kernel.org \
--cc=patches@lists.linux.dev \
--cc=stable@vger.kernel.org \
--cc=syzkaller@googlegroups.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).