netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Kuniyuki Iwashima <kuniyu@amazon.com>
To: "David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>,
	Kuniyuki Iwashima <kuni1840@gmail.com>, <netdev@vger.kernel.org>
Subject: [PATCH v1 net 04/15] af_unix: Annotate data-races around sk->sk_state in unix_write_space() and poll().
Date: Mon, 3 Jun 2024 07:32:20 -0700	[thread overview]
Message-ID: <20240603143231.62085-5-kuniyu@amazon.com> (raw)
In-Reply-To: <20240603143231.62085-1-kuniyu@amazon.com>

unix_poll() and unix_dgram_poll() read sk->sk_state locklessly and
calls unix_writable() which also reads sk->sk_state without holding
unix_state_lock().

Let's use READ_ONCE() in unix_poll() and unix_dgram_poll() and pass
it to unix_writable().

While at it, we remove TCP_SYN_SENT check in unix_dgram_poll() as
that state does not exist for AF_UNIX socket since the code was added.

Fixes: 1586a5877db9 ("af_unix: do not report POLLOUT on listeners")
Fixes: 3c73419c09a5 ("af_unix: fix 'poll for write'/ connected DGRAM sockets")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 net/unix/af_unix.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dfb74822ed2e..e7a756e8711d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -530,9 +530,9 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 	return 0;
 }
 
-static int unix_writable(const struct sock *sk)
+static int unix_writable(const struct sock *sk, unsigned char state)
 {
-	return sk->sk_state != TCP_LISTEN &&
+	return state != TCP_LISTEN &&
 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 }
 
@@ -541,7 +541,7 @@ static void unix_write_space(struct sock *sk)
 	struct socket_wq *wq;
 
 	rcu_read_lock();
-	if (unix_writable(sk)) {
+	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
 		wq = rcu_dereference(sk->sk_wq);
 		if (skwq_has_sleeper(wq))
 			wake_up_interruptible_sync_poll(&wq->wait,
@@ -3129,12 +3129,14 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
 {
 	struct sock *sk = sock->sk;
+	unsigned char state;
 	__poll_t mask;
 	u8 shutdown;
 
 	sock_poll_wait(file, sock, wait);
 	mask = 0;
 	shutdown = READ_ONCE(sk->sk_shutdown);
+	state = READ_ONCE(sk->sk_state);
 
 	/* exceptional events? */
 	if (READ_ONCE(sk->sk_err))
@@ -3156,14 +3158,14 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
 
 	/* Connection-based need to check for termination and startup */
 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
-	    sk->sk_state == TCP_CLOSE)
+	    state == TCP_CLOSE)
 		mask |= EPOLLHUP;
 
 	/*
 	 * we set writable also when the other side has shut down the
 	 * connection. This prevents stuck sockets.
 	 */
-	if (unix_writable(sk))
+	if (unix_writable(sk, state))
 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
 
 	return mask;
@@ -3174,12 +3176,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
 {
 	struct sock *sk = sock->sk, *other;
 	unsigned int writable;
+	unsigned char state;
 	__poll_t mask;
 	u8 shutdown;
 
 	sock_poll_wait(file, sock, wait);
 	mask = 0;
 	shutdown = READ_ONCE(sk->sk_shutdown);
+	state = READ_ONCE(sk->sk_state);
 
 	/* exceptional events? */
 	if (READ_ONCE(sk->sk_err) ||
@@ -3199,19 +3203,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	/* Connection-based need to check for termination and startup */
-	if (sk->sk_type == SOCK_SEQPACKET) {
-		if (sk->sk_state == TCP_CLOSE)
-			mask |= EPOLLHUP;
-		/* connection hasn't started yet? */
-		if (sk->sk_state == TCP_SYN_SENT)
-			return mask;
-	}
+	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
+		mask |= EPOLLHUP;
 
 	/* No write status requested, avoid expensive OUT tests. */
 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
 		return mask;
 
-	writable = unix_writable(sk);
+	writable = unix_writable(sk, state);
 	if (writable) {
 		unix_state_lock(sk);
 
-- 
2.30.2


  parent reply	other threads:[~2024-06-03 14:34 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-03 14:32 [PATCH v1 net 00/15] af_unix: Fix lockless access of sk->sk_state and others fields Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 01/15] af_unix: Set sk->sk_state under unix_state_lock() for truly disconencted peer Kuniyuki Iwashima
2024-06-03 16:26   ` Cong Wang
2024-06-03 16:32     ` Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 02/15] af_unix: Annodate data-races around sk->sk_state for writers Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 03/15] af_unix: Annotate data-race of sk->sk_state in unix_inq_len() Kuniyuki Iwashima
2024-06-03 14:32 ` Kuniyuki Iwashima [this message]
2024-06-03 14:32 ` [PATCH v1 net 05/15] af_unix: Annotate data-race of sk->sk_state in unix_stream_connect() Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 06/15] af_unix: Annotate data-race of sk->sk_state in unix_accept() Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 07/15] af_unix: Annotate data-races around sk->sk_state in sendmsg() and recvmsg() Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 08/15] af_unix: Annotate data-race of sk->sk_state in unix_stream_read_skb() Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 09/15] af_unix: Annotate data-races around sk->sk_state in UNIX_DIAG Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 10/15] af_unix: Annotate data-races around sk->sk_sndbuf Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 11/15] af_unix: Annotate data-race of net->unx.sysctl_max_dgram_qlen Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 12/15] af_unix: Use unix_recvq_full_lockless() in unix_stream_connect() Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 13/15] af_unix: Use skb_queue_empty_lockless() in unix_release_sock() Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 14/15] af_unix: Use skb_queue_len_lockless() in sk_diag_show_rqlen() Kuniyuki Iwashima
2024-06-03 14:32 ` [PATCH v1 net 15/15] af_unix: Annotate data-race of sk->sk_shutdown in sk_diag_fill() Kuniyuki Iwashima

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240603143231.62085-5-kuniyu@amazon.com \
    --to=kuniyu@amazon.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=kuni1840@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).