From: Oleg Nesterov <oleg@redhat.com>
To: "Sapkal, Swapnil" <swapnil.sapkal@amd.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>,
Manfred Spraul <manfred@colorfullife.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
Christian Brauner <brauner@kernel.org>,
David Howells <dhowells@redhat.com>,
WangYuli <wangyuli@uniontech.com>,
linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
K Prateek Nayak <kprateek.nayak@amd.com>,
"Shenoy, Gautham Ranjal" <gautham.shenoy@amd.com>,
Neeraj.Upadhyay@amd.com
Subject: Re: [PATCH] pipe_read: don't wake up the writer if the pipe is still full
Date: Thu, 27 Feb 2025 22:12:29 +0100 [thread overview]
Message-ID: <20250227211229.GD25639@redhat.com> (raw)
In-Reply-To: <c63cc8e8-424f-43e2-834f-fc449b24787e@amd.com>
Sapkal, first of all, thanks again!
On 02/27, Sapkal, Swapnil wrote:
>
> >1. with 1 fd instead of 20:
> >
> >/usr/bin/hackbench -g 16 -f 1 --threads --pipe -l 100000 -s 100
>
> With this I was not able to reproduce the issue. I tried almost 5000
> iterations.
OK,
> >2. with a size which divides 4096 evenly (e.g., 128):
...
> When I retain the number of
> groups to 16 and change the message size to 128, it took me around 150
> iterations to reproduce this issue (with 100 bytes it was 20 iterations).
> The exact command was
>
> /usr/bin/hackbench -g 16 -f 20 --threads --pipe -l 100000 -s 128
Ah, good. This is good ;)
> I will try to sprinkle some trace_printk's in the code where the state of
> the pipe changes. I will report here if I find something.
Great! but...
Sapkal, I was going to finish (and test! ;) the patch below tomorrow, after
you test the previous debugging patch I sent in this thread. But since you
are going to change the kernel...
For the moment, please forget about that (as Mateusz pointed buggy) patch.
Could you apply the patch below and reproduce the problem ?
If yes, please do prctl(666) after the hang and send us the output from
dmesg, between "DUMP START" and "DUMP END". You can just do
$ perl -e 'syscall 157,666'
to call prctl(666) and trigger the dump.
Oleg.
---
diff --git a/fs/pipe.c b/fs/pipe.c
index b0641f75b1ba..566c75a0ff81 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -376,6 +376,8 @@ anon_pipe_read(struct kiocb *iocb, struct iov_iter *to)
}
if (pipe_empty(pipe->head, pipe->tail))
wake_next_reader = false;
+ if (ret > 0)
+ pipe->r_cnt++;
mutex_unlock(&pipe->mutex);
if (wake_writer)
@@ -565,6 +567,8 @@ anon_pipe_write(struct kiocb *iocb, struct iov_iter *from)
out:
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
wake_next_writer = false;
+ if (ret > 0)
+ pipe->w_cnt++;
mutex_unlock(&pipe->mutex);
/*
@@ -695,6 +699,42 @@ pipe_poll(struct file *filp, poll_table *wait)
return mask;
}
+static DEFINE_MUTEX(PI_MUTEX);
+static LIST_HEAD(PI_LIST);
+
+void pi_dump(void);
+void pi_dump(void)
+{
+ struct pipe_inode_info *pipe;
+
+ pr_crit("---------- DUMP START ----------\n");
+ mutex_lock(&PI_MUTEX);
+ list_for_each_entry(pipe, &PI_LIST, pi_list) {
+ unsigned head, tail;
+
+ mutex_lock(&pipe->mutex);
+ head = pipe->head;
+ tail = pipe->tail;
+ pr_crit("E=%d F=%d; W=%d R=%d\n",
+ pipe_empty(head, tail), pipe_full(head, tail, pipe->max_usage),
+ pipe->w_cnt, pipe->r_cnt);
+
+// INCOMPLETE
+pr_crit("RD=%d WR=%d\n", waitqueue_active(&pipe->rd_wait), waitqueue_active(&pipe->wr_wait));
+
+ for (; tail < head; tail++) {
+ struct pipe_buffer *buf = pipe_buf(pipe, tail);
+ WARN_ON(buf->ops != &anon_pipe_buf_ops);
+ pr_crit("buf: o=%d l=%d\n", buf->offset, buf->len);
+ }
+ pr_crit("\n");
+
+ mutex_unlock(&pipe->mutex);
+ }
+ mutex_unlock(&PI_MUTEX);
+ pr_crit("---------- DUMP END ------------\n");
+}
+
static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
{
int kill = 0;
@@ -706,8 +746,14 @@ static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
}
spin_unlock(&inode->i_lock);
- if (kill)
+ if (kill) {
+ if (!list_empty(&pipe->pi_list)) {
+ mutex_lock(&PI_MUTEX);
+ list_del_init(&pipe->pi_list);
+ mutex_unlock(&PI_MUTEX);
+ }
free_pipe_info(pipe);
+ }
}
static int
@@ -790,6 +836,13 @@ struct pipe_inode_info *alloc_pipe_info(void)
if (pipe == NULL)
goto out_free_uid;
+ INIT_LIST_HEAD(&pipe->pi_list);
+ if (!strcmp(current->comm, "hackbench")) {
+ mutex_lock(&PI_MUTEX);
+ list_add_tail(&pipe->pi_list, &PI_LIST);
+ mutex_unlock(&PI_MUTEX);
+ }
+
if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
pipe_bufs = max_size >> PAGE_SHIFT;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 8ff23bf5a819..48d9bf5171dc 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -80,6 +80,9 @@ struct pipe_inode_info {
#ifdef CONFIG_WATCH_QUEUE
struct watch_queue *watch_queue;
#endif
+
+ struct list_head pi_list;
+ unsigned w_cnt, r_cnt;
};
/*
diff --git a/kernel/sys.c b/kernel/sys.c
index 4efca8a97d62..a85e34861b2e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2483,6 +2483,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
error = 0;
switch (option) {
+ case 666: {
+ extern void pi_dump(void);
+ pi_dump();
+ break;
+ }
case PR_SET_PDEATHSIG:
if (!valid_signal(arg2)) {
error = -EINVAL;
next prev parent reply other threads:[~2025-02-27 21:13 UTC|newest]
Thread overview: 109+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-02 14:07 [PATCH] pipe_read: don't wake up the writer if the pipe is still full Oleg Nesterov
2025-01-02 16:20 ` WangYuli
2025-01-02 16:46 ` Oleg Nesterov
2025-01-04 8:42 ` Christian Brauner
2025-01-31 9:49 ` K Prateek Nayak
2025-01-31 13:23 ` Oleg Nesterov
2025-01-31 20:06 ` Linus Torvalds
2025-02-02 17:01 ` Oleg Nesterov
2025-02-02 18:39 ` Linus Torvalds
2025-02-02 19:32 ` Oleg Nesterov
2025-02-04 11:17 ` Christian Brauner
2025-02-03 9:05 ` K Prateek Nayak
2025-02-04 13:49 ` Oleg Nesterov
2025-02-24 9:26 ` Sapkal, Swapnil
2025-02-24 14:24 ` Oleg Nesterov
2025-02-24 18:36 ` Linus Torvalds
2025-02-25 14:26 ` Oleg Nesterov
2025-02-25 11:57 ` Oleg Nesterov
2025-02-26 5:55 ` Sapkal, Swapnil
2025-02-26 11:38 ` Oleg Nesterov
2025-02-26 17:56 ` Sapkal, Swapnil
2025-02-26 18:12 ` Oleg Nesterov
2025-03-03 13:00 ` Alexey Gladkov
2025-03-03 15:46 ` K Prateek Nayak
2025-03-03 17:18 ` Alexey Gladkov
2025-02-26 13:18 ` Mateusz Guzik
2025-02-26 13:21 ` Mateusz Guzik
2025-02-26 17:16 ` Oleg Nesterov
2025-02-27 16:18 ` Sapkal, Swapnil
2025-02-27 16:34 ` Mateusz Guzik
2025-02-27 21:12 ` Oleg Nesterov [this message]
2025-02-28 5:58 ` Sapkal, Swapnil
2025-02-28 14:30 ` Oleg Nesterov
2025-02-28 16:33 ` Oleg Nesterov
2025-03-03 9:46 ` Sapkal, Swapnil
2025-03-03 14:37 ` Mateusz Guzik
2025-03-03 14:51 ` Mateusz Guzik
2025-03-03 15:31 ` K Prateek Nayak
2025-03-03 17:54 ` Mateusz Guzik
2025-03-03 18:11 ` Linus Torvalds
2025-03-03 18:33 ` Mateusz Guzik
2025-03-03 18:55 ` Linus Torvalds
2025-03-03 19:06 ` Mateusz Guzik
2025-03-03 20:27 ` Oleg Nesterov
2025-03-03 20:46 ` Linus Torvalds
2025-03-04 5:31 ` K Prateek Nayak
2025-03-04 6:32 ` Linus Torvalds
2025-03-04 12:54 ` Oleg Nesterov
2025-03-04 13:25 ` Oleg Nesterov
2025-03-04 18:28 ` Linus Torvalds
2025-03-04 22:11 ` Oleg Nesterov
2025-03-05 4:40 ` K Prateek Nayak
2025-03-05 4:52 ` Linus Torvalds
2025-03-04 13:51 ` [PATCH] fs/pipe: Read pipe->{head,tail} atomically outside pipe->mutex K Prateek Nayak
2025-03-04 18:36 ` Alexey Gladkov
2025-03-04 19:03 ` Linus Torvalds
2025-03-05 15:31 ` [PATCH] pipe_read: don't wake up the writer if the pipe is still full Rasmus Villemoes
2025-03-05 16:50 ` Linus Torvalds
2025-03-06 9:48 ` Rasmus Villemoes
2025-03-06 14:42 ` Rasmus Villemoes
2025-03-05 16:40 ` Linus Torvalds
2025-03-06 8:35 ` Rasmus Villemoes
2025-03-06 17:59 ` Linus Torvalds
2025-03-06 9:28 ` Rasmus Villemoes
2025-03-06 11:39 ` [RFC PATCH 0/3] pipe: Convert pipe->{head,tail} to unsigned short K Prateek Nayak
2025-03-06 11:39 ` [RFC PATCH 1/3] fs/pipe: Limit the slots in pipe_resize_ring() K Prateek Nayak
2025-03-06 12:28 ` Oleg Nesterov
2025-03-06 15:26 ` K Prateek Nayak
2025-03-06 11:39 ` [RFC PATCH 2/3] fs/splice: Atomically read pipe->{head,tail} in opipe_prep() K Prateek Nayak
2025-03-06 11:39 ` [RFC PATCH 3/3] treewide: pipe: Convert all references to pipe->{head,tail,max_usage,ring_size} to unsigned short K Prateek Nayak
2025-03-06 12:32 ` Oleg Nesterov
2025-03-06 12:41 ` Oleg Nesterov
2025-03-06 15:33 ` K Prateek Nayak
2025-03-06 18:04 ` Linus Torvalds
2025-03-06 14:27 ` Rasmus Villemoes
2025-03-03 18:32 ` [PATCH] pipe_read: don't wake up the writer if the pipe is still full K Prateek Nayak
2025-03-04 5:22 ` K Prateek Nayak
2025-03-03 16:49 ` Oleg Nesterov
2025-03-04 5:06 ` Hillf Danton
2025-03-04 5:35 ` K Prateek Nayak
2025-03-04 10:29 ` Hillf Danton
2025-03-04 12:34 ` Oleg Nesterov
2025-03-04 23:35 ` Hillf Danton
2025-03-04 23:49 ` Oleg Nesterov
2025-03-05 4:56 ` Hillf Danton
2025-03-05 11:44 ` Oleg Nesterov
2025-03-05 22:46 ` Hillf Danton
2025-03-06 9:30 ` Oleg Nesterov
2025-03-07 6:08 ` Hillf Danton
2025-03-07 6:24 ` K Prateek Nayak
2025-03-07 10:46 ` Hillf Danton
2025-03-07 11:29 ` Oleg Nesterov
2025-03-07 12:34 ` Oleg Nesterov
2025-03-07 23:56 ` Hillf Danton
2025-03-09 14:01 ` K Prateek Nayak
2025-03-09 17:02 ` Oleg Nesterov
2025-03-10 10:49 ` Hillf Danton
2025-03-10 11:09 ` Oleg Nesterov
2025-03-10 11:37 ` Hillf Danton
2025-03-10 12:43 ` Oleg Nesterov
2025-03-10 23:33 ` Hillf Danton
2025-03-11 0:26 ` Linus Torvalds
2025-03-11 6:54 ` Oleg Nesterov
[not found] ` <20250311112922.3342-1-hdanton@sina.com>
2025-03-11 11:53 ` Oleg Nesterov
2025-03-07 11:26 ` Oleg Nesterov
2025-02-27 12:50 ` Oleg Nesterov
2025-02-27 13:52 ` Oleg Nesterov
2025-02-27 15:59 ` Mateusz Guzik
2025-02-27 16:28 ` Oleg Nesterov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250227211229.GD25639@redhat.com \
--to=oleg@redhat.com \
--cc=Neeraj.Upadhyay@amd.com \
--cc=brauner@kernel.org \
--cc=dhowells@redhat.com \
--cc=gautham.shenoy@amd.com \
--cc=kprateek.nayak@amd.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=manfred@colorfullife.com \
--cc=mjguzik@gmail.com \
--cc=swapnil.sapkal@amd.com \
--cc=torvalds@linux-foundation.org \
--cc=wangyuli@uniontech.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).