From: Ziyu Zhang <ziyuzhang201@gmail.com>
To: Stefano Garzarella <sgarzare@redhat.com>,
"David S . Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>, Andy King <acking@vmware.com>,
George Zhang <georgezhang@vmware.com>,
Dmitry Torokhov <dtor@vmware.com>,
virtualization@lists.linux.dev, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, baijiaju1990@gmail.com,
r33s3n6@gmail.com, gality369@gmail.com, zhenghaoran154@gmail.com,
hanguidong02@gmail.com, zzzccc427@gmail.com,
Ziyu Zhang <ziyuzhang201@gmail.com>
Subject: [PATCH net] vsock: keep poll shutdown state consistent
Date: Sat, 16 May 2026 11:47:45 +0800 [thread overview]
Message-ID: <20260516034745.260442-1-ziyuzhang201@gmail.com> (raw)
vsock_poll() reads vsk->peer_shutdown before taking the socket
lock to set EPOLLHUP and EPOLLRDHUP, then reads it again under the
lock to report EOF readability. A shutdown packet can update
peer_shutdown while poll is waiting for the lock, so one poll invocation
can report EPOLLIN without the corresponding HUP/RDHUP bits.
Keep non-connectible sockets on a single lockless READ_ONCE()
snapshot. For connectible sockets, defer shutdown-derived poll bits
until after lock_sock() and use one READ_ONCE() snapshot for both EOF
readability and HUP/RDHUP. This preserves shutdowns that arrive before
the lock is acquired and keeps all peer-shutdown-derived bits consistent
for a poll pass.
Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Signed-off-by: Ziyu Zhang <ziyuzhang201@gmail.com>
---
net/vmw_vsock/af_vsock.c | 40 ++++++++++++++++++++++++++--------------
1 file changed, 26 insertions(+), 14 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index adcba1b7b..bed42347b 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1122,6 +1122,25 @@ static int vsock_shutdown(struct socket *sock, int mode)
return err;
}
+static __poll_t vsock_poll_shutdown(struct sock *sk, u32 peer_shutdown)
+{
+ __poll_t mask = 0;
+
+ /* INET sockets treat local write shutdown and peer write shutdown as a
+ * case of EPOLLHUP set.
+ */
+ if (sk->sk_shutdown == SHUTDOWN_MASK ||
+ ((sk->sk_shutdown & SEND_SHUTDOWN) &&
+ (peer_shutdown & SEND_SHUTDOWN)))
+ mask |= EPOLLHUP;
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN ||
+ peer_shutdown & SEND_SHUTDOWN)
+ mask |= EPOLLRDHUP;
+
+ return mask;
+}
+
static __poll_t vsock_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
@@ -1139,19 +1158,9 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
/* Signify that there has been an error on this socket. */
mask |= EPOLLERR;
- /* INET sockets treat local write shutdown and peer write shutdown as a
- * case of EPOLLHUP set.
- */
- if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
- ((sk->sk_shutdown & SEND_SHUTDOWN) &&
- (vsk->peer_shutdown & SEND_SHUTDOWN))) {
- mask |= EPOLLHUP;
- }
-
- if (sk->sk_shutdown & RCV_SHUTDOWN ||
- vsk->peer_shutdown & SEND_SHUTDOWN) {
- mask |= EPOLLRDHUP;
- }
+ if (!sock_type_connectible(sk->sk_type))
+ mask |= vsock_poll_shutdown(sk,
+ READ_ONCE(vsk->peer_shutdown));
if (sk_is_readable(sk))
mask |= EPOLLIN | EPOLLRDNORM;
@@ -1171,6 +1180,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
} else if (sock_type_connectible(sk->sk_type)) {
const struct vsock_transport *transport;
+ u32 peer_shutdown;
lock_sock(sk);
@@ -1203,10 +1213,12 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
* terminated should also be considered read, and we check the
* shutdown flag for that.
*/
+ peer_shutdown = READ_ONCE(vsk->peer_shutdown);
if (sk->sk_shutdown & RCV_SHUTDOWN ||
- vsk->peer_shutdown & SEND_SHUTDOWN) {
+ peer_shutdown & SEND_SHUTDOWN) {
mask |= EPOLLIN | EPOLLRDNORM;
}
+ mask |= vsock_poll_shutdown(sk, peer_shutdown);
/* Connected sockets that can produce data can be written. */
if (transport && sk->sk_state == TCP_ESTABLISHED) {
--
2.43.0
reply other threads:[~2026-05-16 3:48 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260516034745.260442-1-ziyuzhang201@gmail.com \
--to=ziyuzhang201@gmail.com \
--cc=acking@vmware.com \
--cc=baijiaju1990@gmail.com \
--cc=davem@davemloft.net \
--cc=dtor@vmware.com \
--cc=edumazet@google.com \
--cc=gality369@gmail.com \
--cc=georgezhang@vmware.com \
--cc=hanguidong02@gmail.com \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=r33s3n6@gmail.com \
--cc=sgarzare@redhat.com \
--cc=virtualization@lists.linux.dev \
--cc=zhenghaoran154@gmail.com \
--cc=zzzccc427@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox