All of lore.kernel.org
 help / color / mirror / Atom feed
From: Josh Triplett <josh@joshtriplett.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>,
	Akemi Yagi <toracat@elrepo.org>, DJ Delorie <dj@redhat.com>,
	David Sterba <dsterba@suse.cz>,
	David Howells <dhowells@redhat.com>,
	Eric Biggers <ebiggers@kernel.org>,
	Al Viro <viro@zeniv.linux.org.uk>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>,
	Vincent Guittot <vincent.guittot@linaro.org>
Subject: Re: [PATCH 0/2] pipe: Fixes [ver #2]
Date: Wed, 18 Dec 2019 16:14:46 -0800	[thread overview]
Message-ID: <20191219001446.GA49812@localhost> (raw)
In-Reply-To: <20191219000013.GB13065@localhost>

[-- Attachment #1: Type: text/plain, Size: 540 bytes --]

On Wed, Dec 18, 2019 at 04:03:18PM -0800, Josh Triplett wrote:
> On Wed, Dec 18, 2019 at 02:51:27PM -0800, Linus Torvalds wrote:
> > That's my latest version, but you'll have to tweak it a tiny bit
> > because of d1c6a2aa02af ("pipe: simplify signal handling in
> > pipe_read() and add comments") which I did after that patch.
> 
> That's what I encountered, and I ended up manually fixing it up,
> resulting in the attached patch. Does that look reasonable?

Er, wrong file. That's the original patch; the attached patch is the
right one.

[-- Attachment #2: linux-pipe-fix.patch --]
[-- Type: text/x-diff, Size: 9621 bytes --]

diff --git a/fs/coredump.c b/fs/coredump.c
index b1ea7dfbd149..f8296a82d01d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -517,7 +517,7 @@ static void wait_for_dump_helpers(struct file *file)
 	pipe_lock(pipe);
 	pipe->readers++;
 	pipe->writers--;
-	wake_up_interruptible_sync(&pipe->wait);
+	wake_up_interruptible_sync(&pipe->rd_wait);
 	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	pipe_unlock(pipe);
 
@@ -525,7 +525,7 @@ static void wait_for_dump_helpers(struct file *file)
 	 * We actually want wait_event_freezable() but then we need
 	 * to clear TIF_SIGPENDING and improve dump_interrupted().
 	 */
-	wait_event_interruptible(pipe->wait, pipe->readers == 1);
+	wait_event_interruptible(pipe->rd_wait, pipe->readers == 1);
 
 	pipe_lock(pipe);
 	pipe->readers--;
diff --git a/fs/pipe.c b/fs/pipe.c
index 04d004ee2e8c..5ca6446ca2b1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -108,16 +108,19 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe)
 {
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT(rdwait);
+	DEFINE_WAIT(wrwait);
 
 	/*
 	 * Pipes are system-local resources, so sleeping on them
 	 * is considered a noninteractive wait:
 	 */
-	prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
 	pipe_unlock(pipe);
 	schedule();
-	finish_wait(&pipe->wait, &wait);
+	finish_wait(&pipe->rd_wait, &rdwait);
+	finish_wait(&pipe->wr_wait, &wrwait);
 	pipe_lock(pipe);
 }
 
@@ -286,7 +289,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	size_t total_len = iov_iter_count(to);
 	struct file *filp = iocb->ki_filp;
 	struct pipe_inode_info *pipe = filp->private_data;
-	bool was_full;
+	bool was_full, wake_next_reader = false;
 	ssize_t ret;
 
 	/* Null read succeeds. */
@@ -344,10 +347,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 
 			if (!buf->len) {
 				pipe_buf_release(pipe, buf);
-				spin_lock_irq(&pipe->wait.lock);
+				spin_lock_irq(&pipe->rd_wait.lock);
 				tail++;
 				pipe->tail = tail;
-				spin_unlock_irq(&pipe->wait.lock);
+				spin_unlock_irq(&pipe->rd_wait.lock);
 			}
 			total_len -= chars;
 			if (!total_len)
@@ -384,7 +387,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		 * no data.
 		 */
 		if (unlikely(was_full)) {
-			wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
+			wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
 			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 		}
 
@@ -394,18 +397,23 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		 * since we've done any required wakeups and there's no need
 		 * to mark anything accessed. And we've dropped the lock.
 		 */
-		if (wait_event_interruptible(pipe->wait, pipe_readable(pipe)) < 0)
+		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
 			return -ERESTARTSYS;
 
 		__pipe_lock(pipe);
 		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
+		wake_next_reader = true;
 	}
+	if (pipe_empty(pipe->head, pipe->tail))
+		wake_next_reader = false;
 	__pipe_unlock(pipe);
 
 	if (was_full) {
-		wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
+		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}
+	if (wake_next_reader)
+		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 	if (ret > 0)
 		file_accessed(filp);
 	return ret;
@@ -437,6 +445,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 	size_t total_len = iov_iter_count(from);
 	ssize_t chars;
 	bool was_empty = false;
+	bool wake_next_writer = false;
 
 	/* Null write succeeds. */
 	if (unlikely(total_len == 0))
@@ -515,16 +524,16 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 			 * it, either the reader will consume it or it'll still
 			 * be there for the next write.
 			 */
-			spin_lock_irq(&pipe->wait.lock);
+			spin_lock_irq(&pipe->rd_wait.lock);
 
 			head = pipe->head;
 			if (pipe_full(head, pipe->tail, pipe->max_usage)) {
-				spin_unlock_irq(&pipe->wait.lock);
+				spin_unlock_irq(&pipe->rd_wait.lock);
 				continue;
 			}
 
 			pipe->head = head + 1;
-			spin_unlock_irq(&pipe->wait.lock);
+			spin_unlock_irq(&pipe->rd_wait.lock);
 
 			/* Insert it into the buffer array */
 			buf = &pipe->bufs[head & mask];
@@ -576,14 +585,17 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 		 */
 		__pipe_unlock(pipe);
 		if (was_empty) {
-			wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
+			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		}
-		wait_event_interruptible(pipe->wait, pipe_writable(pipe));
+		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
 		__pipe_lock(pipe);
 		was_empty = pipe_empty(head, pipe->tail);
+		wake_next_writer = true;
 	}
 out:
+	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+		wake_next_writer = false;
 	__pipe_unlock(pipe);
 
 	/*
@@ -596,9 +608,11 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 	 * wake up pending jobs
 	 */
 	if (was_empty) {
-		wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
+		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	}
+	if (wake_next_writer)
+		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
 	if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
 		int err = file_update_time(filp);
 		if (err)
@@ -642,12 +656,15 @@ pipe_poll(struct file *filp, poll_table *wait)
 	unsigned int head, tail;
 
 	/*
-	 * Reading only -- no need for acquiring the semaphore.
+	 * Reading pipe state only -- no need for acquiring the semaphore.
 	 *
 	 * But because this is racy, the code has to add the
 	 * entry to the poll table _first_ ..
 	 */
-	poll_wait(filp, &pipe->wait, wait);
+	if (filp->f_mode & FMODE_READ)
+		poll_wait(filp, &pipe->rd_wait, wait);
+	if (filp->f_mode & FMODE_WRITE)
+		poll_wait(filp, &pipe->wr_wait, wait);
 
 	/*
 	 * .. and only then can you do the racy tests. That way,
@@ -706,7 +723,8 @@ pipe_release(struct inode *inode, struct file *file)
 		pipe->writers--;
 
 	if (pipe->readers || pipe->writers) {
-		wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLOUT | EPOLLRDNORM | EPOLLWRNORM | EPOLLERR | EPOLLHUP);
+		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLHUP);
+		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM | EPOLLERR | EPOLLHUP);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}
@@ -789,7 +807,8 @@ struct pipe_inode_info *alloc_pipe_info(void)
 			     GFP_KERNEL_ACCOUNT);
 
 	if (pipe->bufs) {
-		init_waitqueue_head(&pipe->wait);
+		init_waitqueue_head(&pipe->rd_wait);
+		init_waitqueue_head(&pipe->wr_wait);
 		pipe->r_counter = pipe->w_counter = 1;
 		pipe->max_usage = pipe_bufs;
 		pipe->ring_size = pipe_bufs;
@@ -1007,7 +1026,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
 
 static void wake_up_partner(struct pipe_inode_info *pipe)
 {
-	wake_up_interruptible(&pipe->wait);
+	wake_up_interruptible(&pipe->rd_wait);
+	wake_up_interruptible(&pipe->wr_wait);
 }
 
 static int fifo_open(struct inode *inode, struct file *filp)
@@ -1118,13 +1138,13 @@ static int fifo_open(struct inode *inode, struct file *filp)
 
 err_rd:
 	if (!--pipe->readers)
-		wake_up_interruptible(&pipe->wait);
+		wake_up_interruptible(&pipe->wr_wait);
 	ret = -ERESTARTSYS;
 	goto err;
 
 err_wr:
 	if (!--pipe->writers)
-		wake_up_interruptible(&pipe->wait);
+		wake_up_interruptible(&pipe->rd_wait);
 	ret = -ERESTARTSYS;
 	goto err;
 
@@ -1251,7 +1271,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
 	pipe->max_usage = nr_slots;
 	pipe->tail = tail;
 	pipe->head = head;
-	wake_up_interruptible_all(&pipe->wait);
+	wake_up_interruptible_all(&pipe->rd_wait);
+	wake_up_interruptible_all(&pipe->wr_wait);
 	return pipe->max_usage * PAGE_SIZE;
 
 out_revert_acct:
diff --git a/fs/splice.c b/fs/splice.c
index 3009652a41c8..d671936d0aad 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -165,8 +165,8 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
 static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
 {
 	smp_mb();
-	if (waitqueue_active(&pipe->wait))
-		wake_up_interruptible(&pipe->wait);
+	if (waitqueue_active(&pipe->rd_wait))
+		wake_up_interruptible(&pipe->rd_wait);
 	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 }
 
@@ -462,8 +462,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
 {
 	smp_mb();
-	if (waitqueue_active(&pipe->wait))
-		wake_up_interruptible(&pipe->wait);
+	if (waitqueue_active(&pipe->wr_wait))
+		wake_up_interruptible(&pipe->wr_wait);
 	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 }
 
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index dbcfa6892384..d5765039652a 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -47,7 +47,7 @@ struct pipe_buffer {
  **/
 struct pipe_inode_info {
 	struct mutex mutex;
-	wait_queue_head_t wait;
+	wait_queue_head_t rd_wait, wr_wait;
 	unsigned int head;
 	unsigned int tail;
 	unsigned int max_usage;

  reply	other threads:[~2019-12-19  0:15 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-05 22:30 [PATCH 0/2] pipe: Fixes [ver #2] David Howells
2019-12-05 22:30 ` [PATCH 1/2] pipe: Remove assertion from pipe_poll() " David Howells
2019-12-05 22:30 ` [PATCH 2/2] pipe: Fix missing mask update after pipe_wait() " David Howells
2019-12-05 23:58   ` Linus Torvalds
2019-12-06 13:56 ` [PATCH 0/2] pipe: Fixes " David Sterba
2019-12-06 17:09   ` Linus Torvalds
2019-12-06 17:42     ` Linus Torvalds
2019-12-06 18:59       ` Linus Torvalds
2019-12-07 21:31         ` Akemi Yagi
2019-12-07 21:31           ` Akemi Yagi
2019-12-08 16:45           ` Akemi Yagi
2019-12-08 18:04             ` Linus Torvalds
2019-12-09  3:07               ` Linus Torvalds
2019-12-06 20:28   ` Linus Torvalds
2019-12-06 21:04     ` Linus Torvalds
2019-12-07  3:50       ` Linus Torvalds
2019-12-07  4:01         ` Linus Torvalds
2019-12-07 22:47         ` Linus Torvalds
2019-12-09  9:53           ` Vincent Guittot
2019-12-09 17:48             ` Linus Torvalds
2019-12-09 17:57               ` Akemi Yagi
2019-12-09 18:18                 ` Linus Torvalds
2019-12-09 18:24                   ` Linus Torvalds
2019-12-18 20:59                   ` Josh Triplett
2019-12-10  2:58               ` DJ Delorie
2019-12-10 14:38               ` Vincent Guittot
2019-12-10 17:39                 ` Linus Torvalds
2019-12-11 18:09               ` DJ Delorie
2019-12-11 18:59                 ` Linus Torvalds
2019-12-11 20:55           ` David Howells
2019-12-12  1:28             ` Linus Torvalds
2019-12-12  7:34               ` David Howells
2019-12-12 10:18           ` Konstantin Khlebnikov
2019-12-18 22:51             ` Linus Torvalds
2019-12-19  0:03               ` Josh Triplett
2019-12-19  0:14                 ` Josh Triplett [this message]
2019-12-19  0:51                   ` Linus Torvalds
2019-12-19  0:54                     ` Linus Torvalds
2019-12-19  7:56                     ` David Howells
2019-12-19 16:35                       ` Linus Torvalds
2019-12-09 14:55       ` David Sterba
2019-12-06 21:26     ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191219001446.GA49812@localhost \
    --to=josh@joshtriplett.org \
    --cc=dhowells@redhat.com \
    --cc=dj@redhat.com \
    --cc=dsterba@suse.cz \
    --cc=ebiggers@kernel.org \
    --cc=khlebnikov@yandex-team.ru \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=toracat@elrepo.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vincent.guittot@linaro.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.