Linux Perf Users
 help / color / mirror / Atom feed
* [PATCH v2] perf bench: add --write-size option to sched pipe
@ 2026-05-21 16:15 Breno Leitao
  2026-05-21 16:39 ` sashiko-bot
  2026-05-26  2:17 ` Namhyung Kim
  0 siblings, 2 replies; 4+ messages in thread
From: Breno Leitao @ 2026-05-21 16:15 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, James Clark
  Cc: linux-perf-users, linux-kernel, kernel-team, Breno Leitao

The default ping-pong uses sizeof(int) (4 bytes) per iteration, which
exercises only the pipe-buffer merge path and keeps allocation entirely
out of the picture. That makes the bench a useful scheduler / context-
switch latency probe but unable to surface anything from the pipe
page-allocation hot path.

Add a -s/--write-size option that sets the bytes written and read per
ping-pong iteration. The buffer is allocated for each side via
struct thread_data and replaces the on-stack int previously used. The
default remains sizeof(int) so existing invocations are unchanged.

With --write-size set above PAGE_SIZE the bench drives anon_pipe_write()
through alloc_page() (or the bulk pre-alloc, if the relevant patch is
applied), which is what we want when measuring pipe locking and page
allocation work.

The bench is a ping-pong: both sides call write() before read(), so a
single write_size payload must fit entirely in the pipe buffer or both
sides deadlock waiting for the other to drain. Resize the pipe via
F_SETPIPE_SZ to match write_size (skipped at the sizeof(int) default),
and error out cleanly when the request exceeds
/proc/sys/fs/pipe-max-size.

Signed-off-by: Breno Leitao <leitao@debian.org>
---
This patch has been valuable for testing and verifying the pipe
enhancements currently under discussion at
https://lore.kernel.org/all/20260515-fix_pipe-v1-0-b14c840c7555@debian.org/
---
Changes in v2:
- Reject --write-size == 0 to avoid a zero-byte ping-pong that spins
  (blocking mode) or hangs on epoll_wait (non-blocking mode).
- Validate --write-size <= INT_MAX and drop the (int) casts in the
  read/write BUG_ON and fcntl(F_SETPIPE_SZ) checks, so the comparisons
  are unambiguous regardless of the requested size.
- Fix "acommodate" typo in the pipe-resize comment.
- Link to v1: https://patch.msgid.link/20260515-perf_bench_pipe-v1-1-3c5b805ba178@debian.org

To: Peter Zijlstra <peterz@infradead.org>
To: Ingo Molnar <mingo@redhat.com>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Namhyung Kim <namhyung@kernel.org>
To: Mark Rutland <mark.rutland@arm.com>
To: Alexander Shishkin <alexander.shishkin@linux.intel.com>
To: Jiri Olsa <jolsa@kernel.org>
To: Ian Rogers <irogers@google.com>
To: Adrian Hunter <adrian.hunter@intel.com>
To: James Clark <james.clark@linaro.org>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 tools/perf/bench/sched-pipe.c | 47 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 70139036d68f0..216d3121d438d 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -22,6 +22,7 @@
 #include <string.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <limits.h>
 #include <assert.h>
 #include <sys/epoll.h>
 #include <sys/time.h>
@@ -39,6 +40,7 @@ struct thread_data {
 	int			epoll_fd;
 	bool			cgroup_failed;
 	pthread_t		pthread;
+	char			*buf;
 };
 
 #define LOOPS_DEFAULT 1000000
@@ -48,6 +50,7 @@ static	int			loops = LOOPS_DEFAULT;
 static bool			threaded;
 
 static bool			nonblocking;
+static unsigned int		write_size = sizeof(int);
 static char			*cgrp_names[2];
 static struct cgroup		*cgrps[2];
 
@@ -88,6 +91,8 @@ static const struct option options[] = {
 	OPT_BOOLEAN('n', "nonblocking",	&nonblocking,	"Use non-blocking operations"),
 	OPT_INTEGER('l', "loop",	&loops,		"Specify number of loops"),
 	OPT_BOOLEAN('T', "threaded",	&threaded,	"Specify threads/process based task setup"),
+	OPT_UINTEGER('s', "write-size", &write_size,
+		     "Bytes per ping-pong write (default 4-bytes). Use larger values to exercise the pipe page-allocation path."),
 	OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV",
 		     "Put sender and receivers in given cgroups",
 		     parse_two_cgroups),
@@ -172,14 +177,14 @@ static void exit_cgroup(int nr)
 
 static inline int read_pipe(struct thread_data *td)
 {
-	int ret, m;
+	int ret;
 retry:
 	if (nonblocking) {
 		ret = epoll_wait(td->epoll_fd, &td->epoll_ev, 1, -1);
 		if (ret < 0)
 			return ret;
 	}
-	ret = read(td->pipe_read, &m, sizeof(int));
+	ret = read(td->pipe_read, td->buf, write_size);
 	if (nonblocking && ret < 0 && errno == EWOULDBLOCK)
 		goto retry;
 	return ret;
@@ -188,7 +193,7 @@ static inline int read_pipe(struct thread_data *td)
 static void *worker_thread(void *__tdata)
 {
 	struct thread_data *td = __tdata;
-	int i, ret, m = 0;
+	int i, ret;
 
 	ret = enter_cgroup(td->nr);
 	if (ret < 0) {
@@ -204,10 +209,10 @@ static void *worker_thread(void *__tdata)
 	}
 
 	for (i = 0; i < loops; i++) {
-		ret = write(td->pipe_write, &m, sizeof(int));
-		BUG_ON(ret != sizeof(int));
+		ret = write(td->pipe_write, td->buf, write_size);
+		BUG_ON(ret < 0 || (unsigned int)ret != write_size);
 		ret = read_pipe(td);
-		BUG_ON(ret != sizeof(int));
+		BUG_ON(ret < 0 || (unsigned int)ret != write_size);
 	}
 
 	return NULL;
@@ -233,12 +238,39 @@ int bench_sched_pipe(int argc, const char **argv)
 
 	argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
 
+	if (write_size == 0 || write_size > INT_MAX) {
+		fprintf(stderr, "--write-size must be in 1..%d\n", INT_MAX);
+		return -1;
+	}
+
 	if (nonblocking)
 		flags |= O_NONBLOCK;
 
 	BUG_ON(pipe2(pipe_1, flags));
 	BUG_ON(pipe2(pipe_2, flags));
 
+	/*
+	 * On a custom write_size, resize the pipes so a single payload fits.
+	 */
+	if (write_size > sizeof(int)) {
+		int r1 = fcntl(pipe_1[1], F_SETPIPE_SZ, write_size);
+		int r2 = fcntl(pipe_2[1], F_SETPIPE_SZ, write_size);
+
+		if (r1 < 0 || r2 < 0 ||
+		    (unsigned int)r1 < write_size ||
+		    (unsigned int)r2 < write_size) {
+			fprintf(stderr,
+				"--write-size %u exceeds /proc/sys/fs/pipe-max-size\n",
+				write_size);
+			return -1;
+		}
+	}
+
+	for (t = 0; t < nr_threads; t++) {
+		threads[t].buf = calloc(1, write_size);
+		BUG_ON(!threads[t].buf);
+	}
+
 	gettimeofday(&start, NULL);
 
 	for (t = 0; t < nr_threads; t++) {
@@ -287,6 +319,9 @@ int bench_sched_pipe(int argc, const char **argv)
 	gettimeofday(&stop, NULL);
 	timersub(&stop, &start, &diff);
 
+	for (t = 0; t < nr_threads; t++)
+		free(threads[t].buf);
+
 	exit_cgroup(0);
 	exit_cgroup(1);
 

---
base-commit: e98d21c170b01ddef366f023bbfcf6b31509fa83
change-id: 20260515-perf_bench_pipe-bae2ec777c4b

Best regards,
--  
Breno Leitao <leitao@debian.org>


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] perf bench: add --write-size option to sched pipe
  2026-05-21 16:15 [PATCH v2] perf bench: add --write-size option to sched pipe Breno Leitao
@ 2026-05-21 16:39 ` sashiko-bot
  2026-05-21 17:12   ` Breno Leitao
  2026-05-26  2:17 ` Namhyung Kim
  1 sibling, 1 reply; 4+ messages in thread
From: sashiko-bot @ 2026-05-21 16:39 UTC (permalink / raw)
  To: Breno Leitao; +Cc: linux-perf-users

Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [Medium] File descriptors allocated for pipes are leaked when returning early due to an fcntl() failure.
--

commit a15da46bb7bbcff2fe4b600fd4cd1fdf6d6d3ff0
Author: Breno Leitao <leitao@debian.org>

perf bench: add --write-size option to sched pipe

This commit adds a --write-size option to the sched pipe benchmark in perf to
allow testing the pipe page-allocation path. It updates the benchmark to
allocate a buffer per thread and resize the pipe capacity via fcntl() when a
larger write size is specified.

> diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
> index 70139036d68f0..216d3121d438d 100644
> --- a/tools/perf/bench/sched-pipe.c
> +++ b/tools/perf/bench/sched-pipe.c
[ ... ]
> @@ -233,12 +238,39 @@ int bench_sched_pipe(int argc, const char **argv)
[ ... ]
>  	BUG_ON(pipe2(pipe_1, flags));
>  	BUG_ON(pipe2(pipe_2, flags));
>  
> +	/*
> +	 * On a custom write_size, resize the pipes so a single payload fits.
> +	 */
> +	if (write_size > sizeof(int)) {
> +		int r1 = fcntl(pipe_1[1], F_SETPIPE_SZ, write_size);
> +		int r2 = fcntl(pipe_2[1], F_SETPIPE_SZ, write_size);
> +
> +		if (r1 < 0 || r2 < 0 ||
> +		    (unsigned int)r1 < write_size ||
> +		    (unsigned int)r2 < write_size) {
> +			fprintf(stderr,
> +				"--write-size %u exceeds /proc/sys/fs/pipe-max-size\n",
> +				write_size);
> +			return -1;

[Severity: Medium]
Are the file descriptors allocated for pipe_1 and pipe_2 leaked on this error
path?

The pipe2() calls just above this block allocate four file descriptors. If the
fcntl() operations fail and this path returns -1, those descriptors appear to
be left open.

> +		}
> +	}

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260521-perf_bench_pipe-v2-1-720b6ff7f0fa@debian.org?part=1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] perf bench: add --write-size option to sched pipe
  2026-05-21 16:39 ` sashiko-bot
@ 2026-05-21 17:12   ` Breno Leitao
  0 siblings, 0 replies; 4+ messages in thread
From: Breno Leitao @ 2026-05-21 17:12 UTC (permalink / raw)
  To: sashiko-reviews; +Cc: linux-perf-users

On Thu, May 21, 2026 at 04:39:50PM +0000, sashiko-bot@kernel.org wrote:
> [Severity: Medium]
> Are the file descriptors allocated for pipe_1 and pipe_2 leaked on this error
> path?
> 
> The pipe2() calls just above this block allocate four file descriptors. If the
> fcntl() operations fail and this path returns -1, those descriptors appear to
> be left open.

Technically true, but not a problem in practice: bench_sched_pipe()
returning -1 bubbles up to perf bench and the process exits, so the
kernel reclaims the fds, and there is no real leak. 

There's no actual leak in the running program; it's purely a static-analysis nit.

The rest of the function follows the same pattern — pipes are never explicitly
closed on the success path either, they're released at exit.

Not planning to fix in v3 unless a maintainer prefers otherwise.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] perf bench: add --write-size option to sched pipe
  2026-05-21 16:15 [PATCH v2] perf bench: add --write-size option to sched pipe Breno Leitao
  2026-05-21 16:39 ` sashiko-bot
@ 2026-05-26  2:17 ` Namhyung Kim
  1 sibling, 0 replies; 4+ messages in thread
From: Namhyung Kim @ 2026-05-26  2:17 UTC (permalink / raw)
  To: Breno Leitao
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Ian Rogers,
	Adrian Hunter, James Clark, linux-perf-users, linux-kernel,
	kernel-team

On Thu, May 21, 2026 at 09:15:37AM -0700, Breno Leitao wrote:
> The default ping-pong uses sizeof(int) (4 bytes) per iteration, which
> exercises only the pipe-buffer merge path and keeps allocation entirely
> out of the picture. That makes the bench a useful scheduler / context-
> switch latency probe but unable to surface anything from the pipe
> page-allocation hot path.
> 
> Add a -s/--write-size option that sets the bytes written and read per
> ping-pong iteration. The buffer is allocated for each side via
> struct thread_data and replaces the on-stack int previously used. The
> default remains sizeof(int) so existing invocations are unchanged.
> 
> With --write-size set above PAGE_SIZE the bench drives anon_pipe_write()
> through alloc_page() (or the bulk pre-alloc, if the relevant patch is
> applied), which is what we want when measuring pipe locking and page
> allocation work.
> 
> The bench is a ping-pong: both sides call write() before read(), so a
> single write_size payload must fit entirely in the pipe buffer or both
> sides deadlock waiting for the other to drain. Resize the pipe via
> F_SETPIPE_SZ to match write_size (skipped at the sizeof(int) default),
> and error out cleanly when the request exceeds
> /proc/sys/fs/pipe-max-size.
> 
> Signed-off-by: Breno Leitao <leitao@debian.org>
> ---
> This patch has been valuable for testing and verifying the pipe
> enhancements currently under discussion at
> https://lore.kernel.org/all/20260515-fix_pipe-v1-0-b14c840c7555@debian.org/
> ---
> Changes in v2:
> - Reject --write-size == 0 to avoid a zero-byte ping-pong that spins
>   (blocking mode) or hangs on epoll_wait (non-blocking mode).
> - Validate --write-size <= INT_MAX and drop the (int) casts in the
>   read/write BUG_ON and fcntl(F_SETPIPE_SZ) checks, so the comparisons
>   are unambiguous regardless of the requested size.
> - Fix "acommodate" typo in the pipe-resize comment.
> - Link to v1: https://patch.msgid.link/20260515-perf_bench_pipe-v1-1-3c5b805ba178@debian.org
> 
> To: Peter Zijlstra <peterz@infradead.org>
> To: Ingo Molnar <mingo@redhat.com>
> To: Arnaldo Carvalho de Melo <acme@kernel.org>
> To: Namhyung Kim <namhyung@kernel.org>
> To: Mark Rutland <mark.rutland@arm.com>
> To: Alexander Shishkin <alexander.shishkin@linux.intel.com>
> To: Jiri Olsa <jolsa@kernel.org>
> To: Ian Rogers <irogers@google.com>
> To: Adrian Hunter <adrian.hunter@intel.com>
> To: James Clark <james.clark@linaro.org>
> Cc: linux-perf-users@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---
>  tools/perf/bench/sched-pipe.c | 47 +++++++++++++++++++++++++++++++++++++------
>  1 file changed, 41 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
> index 70139036d68f0..216d3121d438d 100644
> --- a/tools/perf/bench/sched-pipe.c
> +++ b/tools/perf/bench/sched-pipe.c
> @@ -22,6 +22,7 @@
>  #include <string.h>
>  #include <errno.h>
>  #include <fcntl.h>
> +#include <limits.h>
>  #include <assert.h>
>  #include <sys/epoll.h>
>  #include <sys/time.h>
> @@ -39,6 +40,7 @@ struct thread_data {
>  	int			epoll_fd;
>  	bool			cgroup_failed;
>  	pthread_t		pthread;
> +	char			*buf;
>  };
>  
>  #define LOOPS_DEFAULT 1000000
> @@ -48,6 +50,7 @@ static	int			loops = LOOPS_DEFAULT;
>  static bool			threaded;
>  
>  static bool			nonblocking;
> +static unsigned int		write_size = sizeof(int);
>  static char			*cgrp_names[2];
>  static struct cgroup		*cgrps[2];
>  
> @@ -88,6 +91,8 @@ static const struct option options[] = {
>  	OPT_BOOLEAN('n', "nonblocking",	&nonblocking,	"Use non-blocking operations"),
>  	OPT_INTEGER('l', "loop",	&loops,		"Specify number of loops"),
>  	OPT_BOOLEAN('T', "threaded",	&threaded,	"Specify threads/process based task setup"),
> +	OPT_UINTEGER('s', "write-size", &write_size,
> +		     "Bytes per ping-pong write (default 4-bytes). Use larger values to exercise the pipe page-allocation path."),
>  	OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV",
>  		     "Put sender and receivers in given cgroups",
>  		     parse_two_cgroups),
> @@ -172,14 +177,14 @@ static void exit_cgroup(int nr)
>  
>  static inline int read_pipe(struct thread_data *td)
>  {
> -	int ret, m;
> +	int ret;
>  retry:
>  	if (nonblocking) {
>  		ret = epoll_wait(td->epoll_fd, &td->epoll_ev, 1, -1);
>  		if (ret < 0)
>  			return ret;
>  	}
> -	ret = read(td->pipe_read, &m, sizeof(int));
> +	ret = read(td->pipe_read, td->buf, write_size);
>  	if (nonblocking && ret < 0 && errno == EWOULDBLOCK)
>  		goto retry;
>  	return ret;
> @@ -188,7 +193,7 @@ static inline int read_pipe(struct thread_data *td)
>  static void *worker_thread(void *__tdata)
>  {
>  	struct thread_data *td = __tdata;
> -	int i, ret, m = 0;
> +	int i, ret;
>  
>  	ret = enter_cgroup(td->nr);
>  	if (ret < 0) {
> @@ -204,10 +209,10 @@ static void *worker_thread(void *__tdata)
>  	}
>  
>  	for (i = 0; i < loops; i++) {
> -		ret = write(td->pipe_write, &m, sizeof(int));
> -		BUG_ON(ret != sizeof(int));
> +		ret = write(td->pipe_write, td->buf, write_size);
> +		BUG_ON(ret < 0 || (unsigned int)ret != write_size);
>  		ret = read_pipe(td);
> -		BUG_ON(ret != sizeof(int));
> +		BUG_ON(ret < 0 || (unsigned int)ret != write_size);

Is it possible to return smaller values than required due to signal or
something?

Thanks,
Namhyung


>  	}
>  
>  	return NULL;
> @@ -233,12 +238,39 @@ int bench_sched_pipe(int argc, const char **argv)
>  
>  	argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
>  
> +	if (write_size == 0 || write_size > INT_MAX) {
> +		fprintf(stderr, "--write-size must be in 1..%d\n", INT_MAX);
> +		return -1;
> +	}
> +
>  	if (nonblocking)
>  		flags |= O_NONBLOCK;
>  
>  	BUG_ON(pipe2(pipe_1, flags));
>  	BUG_ON(pipe2(pipe_2, flags));
>  
> +	/*
> +	 * On a custom write_size, resize the pipes so a single payload fits.
> +	 */
> +	if (write_size > sizeof(int)) {
> +		int r1 = fcntl(pipe_1[1], F_SETPIPE_SZ, write_size);
> +		int r2 = fcntl(pipe_2[1], F_SETPIPE_SZ, write_size);
> +
> +		if (r1 < 0 || r2 < 0 ||
> +		    (unsigned int)r1 < write_size ||
> +		    (unsigned int)r2 < write_size) {
> +			fprintf(stderr,
> +				"--write-size %u exceeds /proc/sys/fs/pipe-max-size\n",
> +				write_size);
> +			return -1;
> +		}
> +	}
> +
> +	for (t = 0; t < nr_threads; t++) {
> +		threads[t].buf = calloc(1, write_size);
> +		BUG_ON(!threads[t].buf);
> +	}
> +
>  	gettimeofday(&start, NULL);
>  
>  	for (t = 0; t < nr_threads; t++) {
> @@ -287,6 +319,9 @@ int bench_sched_pipe(int argc, const char **argv)
>  	gettimeofday(&stop, NULL);
>  	timersub(&stop, &start, &diff);
>  
> +	for (t = 0; t < nr_threads; t++)
> +		free(threads[t].buf);
> +
>  	exit_cgroup(0);
>  	exit_cgroup(1);
>  
> 
> ---
> base-commit: e98d21c170b01ddef366f023bbfcf6b31509fa83
> change-id: 20260515-perf_bench_pipe-bae2ec777c4b
> 
> Best regards,
> --  
> Breno Leitao <leitao@debian.org>
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-05-26  2:17 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-21 16:15 [PATCH v2] perf bench: add --write-size option to sched pipe Breno Leitao
2026-05-21 16:39 ` sashiko-bot
2026-05-21 17:12   ` Breno Leitao
2026-05-26  2:17 ` Namhyung Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox