From: Jann Horn <jannh@google.com>
To: Kuniyuki Iwashima <kuniyu@google.com>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>,
Paolo Abeni <pabeni@redhat.com>, Simon Horman <horms@kernel.org>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
Jann Horn <jannh@google.com>
Subject: [PATCH 2/3] af_unix: Simplify unix_stream_data_wait()
Date: Fri, 15 May 2026 20:54:09 +0200 [thread overview]
Message-ID: <20260515-unix-recv-wait-v1-2-76adb5f063d5@google.com> (raw)
In-Reply-To: <20260515-unix-recv-wait-v1-0-76adb5f063d5@google.com>
The current implementation of unix_stream_data_wait() works like this:
- unix_stream_read_generic() grabs locks
- unix_stream_read_generic() determines that the read must block
- unix_stream_read_generic() drops locks
- unix_stream_data_wait() sets up a wait_queue_entry
- unix_stream_data_wait() rechecks that the read must still block, with
less locking protection than unix_stream_read_generic()
- unix_stream_data_wait() waits, then loops back to recheck again
That seems needlessly complicated; and it also involves an ugly comparison
between a potentially-dangling `last` pointer and another
potentially-dangling `tail` pointer.
Instead, let's set up a wait_queue_entry
while the locks grabbed by unix_stream_read_generic() are still held, and
after the wait, directly retry the read.
Signed-off-by: Jann Horn <jannh@google.com>
---
net/unix/af_unix.c | 49 ++++++++++++++-----------------------------------
1 file changed, 14 insertions(+), 35 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dc71ed79be4a..b38804e2c5ac 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2710,39 +2710,22 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
/*
* Sleep until more data has arrived. But check for races..
*/
-static long unix_stream_data_wait(struct sock *sk, long timeo,
- struct sk_buff *last, bool freezable)
+static long unix_stream_data_wait(struct sock *sk, long timeo, bool freezable)
+__releases(&unix_sk(sk)->iolock)
+__releases(&unix_sk(sk)->lock)
{
unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
- struct sk_buff *tail;
DEFINE_WAIT(wait);
- unix_state_lock(sk);
-
- for (;;) {
- prepare_to_wait(sk_sleep(sk), &wait, state);
-
- tail = skb_peek_tail(&sk->sk_receive_queue);
- if (tail != last ||
- sk->sk_err ||
- (sk->sk_shutdown & RCV_SHUTDOWN) ||
- signal_pending(current) ||
- !timeo)
- break;
-
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- unix_state_unlock(sk);
- timeo = schedule_timeout(timeo);
- unix_state_lock(sk);
-
- if (sock_flag(sk, SOCK_DEAD))
- break;
+ prepare_to_wait(sk_sleep(sk), &wait, state);
+ unix_state_unlock(sk);
+ mutex_unlock(&unix_sk(sk)->iolock);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- }
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ timeo = schedule_timeout(timeo);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
finish_wait(sk_sleep(sk), &wait);
- unix_state_unlock(sk);
return timeo;
}
@@ -2955,7 +2938,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
skip = max(sk_peek_offset(sk, flags), 0);
do {
- struct sk_buff *skb, *last;
+ struct sk_buff *skb;
int chunk;
unix_state_lock(sk);
@@ -2963,7 +2946,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
err = -ECONNRESET;
goto unlock;
}
- last = skb = skb_peek(&sk->sk_receive_queue);
+ skb = skb_peek(&sk->sk_receive_queue);
again:
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
@@ -2989,15 +2972,13 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
if (sk->sk_shutdown & RCV_SHUTDOWN)
goto unlock;
- unix_state_unlock(sk);
if (!timeo) {
err = -EAGAIN;
- break;
+ goto unlock;
}
- mutex_unlock(&u->iolock);
-
- timeo = unix_stream_data_wait(sk, timeo, last, freezable);
+ /* does unix_state_unlock() and drops u->iolock */
+ timeo = unix_stream_data_wait(sk, timeo, freezable);
if (signal_pending(current)) {
err = sock_intr_errno(timeo);
@@ -3013,7 +2994,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
while (skip >= unix_skb_len(skb)) {
skip -= unix_skb_len(skb);
- last = skb;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (!skb)
goto again;
@@ -3087,7 +3067,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
break;
skip = 0;
- last = skb;
unix_state_lock(sk);
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (skb)
--
2.54.0.563.g4f69b47b94-goog
next prev parent reply other threads:[~2026-05-15 18:54 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-15 18:54 [PATCH 0/3] af_unix: unix_stream_data_wait() fix and improvements Jann Horn
2026-05-15 18:54 ` [PATCH 1/3] af_unix: Fix UAF read of tail->len in unix_stream_data_wait() Jann Horn
2026-05-15 18:54 ` Jann Horn [this message]
2026-05-15 18:54 ` [PATCH 3/3] af_unix: prevent spurious reader wakeups by writer Jann Horn
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260515-unix-recv-wait-v1-2-76adb5f063d5@google.com \
--to=jannh@google.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hannes@stressinduktion.org \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=kuniyu@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox