public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] pipe: don't update {a,c,m}time for anonymous pipes
@ 2025-02-04 13:21 Oleg Nesterov
  2025-02-04 14:21 ` Mateusz Guzik
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Oleg Nesterov @ 2025-02-04 13:21 UTC (permalink / raw)
  To: Christian Brauner, Linus Torvalds
  Cc: David Howells, Gautham R. Shenoy, K Prateek Nayak, Mateusz Guzik,
	Neeraj Upadhyay, Oliver Sang, Swapnil Sapkal, WangYuli,
	linux-fsdevel, linux-kernel

These numbers are visible in fstat() but hopefully nobody uses this
information and file_accessed/file_update_time are not that cheap.
Stupid test-case:

	#include <stdio.h>
	#include <stdlib.h>
	#include <unistd.h>
	#include <assert.h>
	#include <sys/ioctl.h>
	#include <sys/time.h>

	static char buf[17 * 4096];
	static struct timeval TW, TR;

	int wr(int fd, int size)
	{
		int c, r;
		struct timeval t0, t1;

		gettimeofday(&t0, NULL);
		for (c = 0; (r = write(fd, buf, size)) > 0; c += r);
		gettimeofday(&t1, NULL);
		timeradd(&TW, &t1, &TW);
		timersub(&TW, &t0, &TW);

		return c;
	}

	int rd(int fd, int size)
	{
		int c, r;
		struct timeval t0, t1;

		gettimeofday(&t0, NULL);
		for (c = 0; (r = read(fd, buf, size)) > 0; c += r);
		gettimeofday(&t1, NULL);
		timeradd(&TR, &t1, &TR);
		timersub(&TR, &t0, &TR);

		return c;
	}

	int main(int argc, const char *argv[])
	{
		int fd[2], nb = 1, loop, size;

		assert(argc == 3);
		loop = atoi(argv[1]);
		size = atoi(argv[2]);

		assert(pipe(fd) == 0);
		assert(ioctl(fd[0], FIONBIO, &nb) == 0);
		assert(ioctl(fd[1], FIONBIO, &nb) == 0);

		assert(size <= sizeof(buf));
		while (loop--)
			assert(wr(fd[1], size) == rd(fd[0], size));

		struct timeval tt;
		timeradd(&TW, &TR, &tt);
		printf("TW = %lu.%03lu TR = %lu.%03lu TT = %lu.%03lu\n",
			TW.tv_sec, TW.tv_usec/1000,
			TR.tv_sec, TR.tv_usec/1000,
			tt.tv_sec, tt.tv_usec/1000);

		return 0;
	}

Before:
	# for i in 1 2 3; do /host/tmp/test 10000 100; done
	TW = 8.047 TR = 5.845 TT = 13.893
	TW = 8.091 TR = 5.872 TT = 13.963
	TW = 8.083 TR = 5.885 TT = 13.969
After:
	# for i in 1 2 3; do /host/tmp/test 10000 100; done
	TW = 4.752 TR = 4.664 TT = 9.416
	TW = 4.684 TR = 4.608 TT = 9.293
	TW = 4.736 TR = 4.652 TT = 9.388

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 fs/pipe.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 94b59045ab44..baaa8c0817f1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -247,6 +247,11 @@ static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
 	return tail;
 }
 
+static inline bool is_pipe_inode(struct inode *inode)
+{
+	return inode->i_sb->s_magic == PIPEFS_MAGIC;
+}
+
 static ssize_t
 pipe_read(struct kiocb *iocb, struct iov_iter *to)
 {
@@ -404,7 +409,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 	if (wake_next_reader)
 		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
-	if (ret > 0)
+	if (ret > 0 && !is_pipe_inode(file_inode(filp)))
 		file_accessed(filp);
 	return ret;
 }
@@ -604,11 +609,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	if (wake_next_writer)
 		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
-	if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
-		int err = file_update_time(filp);
-		if (err)
-			ret = err;
-		sb_end_write(file_inode(filp)->i_sb);
+	if (ret > 0 && !is_pipe_inode(file_inode(filp))) {
+		if (sb_start_write_trylock(file_inode(filp)->i_sb)) {
+			int err = file_update_time(filp);
+			if (err)
+				ret = err;
+			sb_end_write(file_inode(filp)->i_sb);
+		}
 	}
 	return ret;
 }
@@ -1108,7 +1115,7 @@ static void wake_up_partner(struct pipe_inode_info *pipe)
 static int fifo_open(struct inode *inode, struct file *filp)
 {
 	struct pipe_inode_info *pipe;
-	bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
+	bool is_pipe = is_pipe_inode(inode);
 	int ret;
 
 	filp->f_pipe = 0;
-- 
2.25.1.362.g51ebf55



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] pipe: don't update {a,c,m}time for anonymous pipes
  2025-02-04 13:21 [PATCH] pipe: don't update {a,c,m}time for anonymous pipes Oleg Nesterov
@ 2025-02-04 14:21 ` Mateusz Guzik
  2025-02-04 14:39   ` Oleg Nesterov
  2025-02-04 16:34   ` Christian Brauner
  2025-02-04 14:48 ` Jeff Layton
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 9+ messages in thread
From: Mateusz Guzik @ 2025-02-04 14:21 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Christian Brauner, Linus Torvalds, David Howells,
	Gautham R. Shenoy, K Prateek Nayak, Neeraj Upadhyay, Oliver Sang,
	Swapnil Sapkal, WangYuli, linux-fsdevel, linux-kernel

On Tue, Feb 4, 2025 at 2:22 PM Oleg Nesterov <oleg@redhat.com> wrote:
> These numbers are visible in fstat() but hopefully nobody uses this
> information and file_accessed/file_update_time are not that cheap.

I'll note majority of the problem most likely stems from
mnt_get_write_access rolling with:
        preempt_disable();
        mnt_inc_writers(mnt);
        /*
         * The store to mnt_inc_writers must be visible before we pass
         * MNT_WRITE_HOLD loop below, so that the slowpath can see our
         * incremented count after it has set MNT_WRITE_HOLD.
         */
        smp_mb();

Unfortunately as is the MNT_WRITE_HOLD thing gets set under a
spinlock, so it may be this will be hard to rework in the same vein in
which percpu rwsems are operating, but perhaps someone(tm) may take an
honest shot?

Would be of benefit everywhere and possibly obsolete this patch.

> +       if (ret > 0 && !is_pipe_inode(file_inode(filp))) {

Total nit in case there is a v2:

ret is typically > 0 and most of the time this is going to be an
anonymous pipe, so I would swap these conditions around.

A not nit is that "is_pipe_inode" is imo misleading -- a named pipe
inode is also a pipe inode. How about "is_anon_pipe"?

-- 
Mateusz Guzik <mjguzik gmail.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pipe: don't update {a,c,m}time for anonymous pipes
  2025-02-04 14:21 ` Mateusz Guzik
@ 2025-02-04 14:39   ` Oleg Nesterov
  2025-02-04 16:34   ` Christian Brauner
  1 sibling, 0 replies; 9+ messages in thread
From: Oleg Nesterov @ 2025-02-04 14:39 UTC (permalink / raw)
  To: Mateusz Guzik
  Cc: Christian Brauner, Linus Torvalds, David Howells,
	Gautham R. Shenoy, K Prateek Nayak, Neeraj Upadhyay, Oliver Sang,
	Swapnil Sapkal, WangYuli, linux-fsdevel, linux-kernel

On 02/04, Mateusz Guzik wrote:
>
> > +       if (ret > 0 && !is_pipe_inode(file_inode(filp))) {
>
> Total nit in case there is a v2:
>
> ret is typically > 0 and most of the time this is going to be an
> anonymous pipe, so I would swap these conditions around.
>
> A not nit is that "is_pipe_inode" is imo misleading -- a named pipe
> inode is also a pipe inode. How about "is_anon_pipe"?

OK, I'll wait for other reviews and send v2 with the changes you suggest.

Oleg.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pipe: don't update {a,c,m}time for anonymous pipes
  2025-02-04 13:21 [PATCH] pipe: don't update {a,c,m}time for anonymous pipes Oleg Nesterov
  2025-02-04 14:21 ` Mateusz Guzik
@ 2025-02-04 14:48 ` Jeff Layton
  2025-02-04 14:53 ` Jeff Layton
  2025-02-04 15:28 ` Linus Torvalds
  3 siblings, 0 replies; 9+ messages in thread
From: Jeff Layton @ 2025-02-04 14:48 UTC (permalink / raw)
  To: Oleg Nesterov, Christian Brauner, Linus Torvalds
  Cc: David Howells, Gautham R. Shenoy, K Prateek Nayak, Mateusz Guzik,
	Neeraj Upadhyay, Oliver Sang, Swapnil Sapkal, WangYuli,
	linux-fsdevel, linux-kernel

On Tue, 2025-02-04 at 14:21 +0100, Oleg Nesterov wrote:
> These numbers are visible in fstat() but hopefully nobody uses this
> information and file_accessed/file_update_time are not that cheap.
> Stupid test-case:
> 
> 	#include <stdio.h>
> 	#include <stdlib.h>
> 	#include <unistd.h>
> 	#include <assert.h>
> 	#include <sys/ioctl.h>
> 	#include <sys/time.h>
> 
> 	static char buf[17 * 4096];
> 	static struct timeval TW, TR;
> 
> 	int wr(int fd, int size)
> 	{
> 		int c, r;
> 		struct timeval t0, t1;
> 
> 		gettimeofday(&t0, NULL);
> 		for (c = 0; (r = write(fd, buf, size)) > 0; c += r);
> 		gettimeofday(&t1, NULL);
> 		timeradd(&TW, &t1, &TW);
> 		timersub(&TW, &t0, &TW);
> 
> 		return c;
> 	}
> 
> 	int rd(int fd, int size)
> 	{
> 		int c, r;
> 		struct timeval t0, t1;
> 
> 		gettimeofday(&t0, NULL);
> 		for (c = 0; (r = read(fd, buf, size)) > 0; c += r);
> 		gettimeofday(&t1, NULL);
> 		timeradd(&TR, &t1, &TR);
> 		timersub(&TR, &t0, &TR);
> 
> 		return c;
> 	}
> 
> 	int main(int argc, const char *argv[])
> 	{
> 		int fd[2], nb = 1, loop, size;
> 
> 		assert(argc == 3);
> 		loop = atoi(argv[1]);
> 		size = atoi(argv[2]);
> 
> 		assert(pipe(fd) == 0);
> 		assert(ioctl(fd[0], FIONBIO, &nb) == 0);
> 		assert(ioctl(fd[1], FIONBIO, &nb) == 0);
> 
> 		assert(size <= sizeof(buf));
> 		while (loop--)
> 			assert(wr(fd[1], size) == rd(fd[0], size));
> 
> 		struct timeval tt;
> 		timeradd(&TW, &TR, &tt);
> 		printf("TW = %lu.%03lu TR = %lu.%03lu TT = %lu.%03lu\n",
> 			TW.tv_sec, TW.tv_usec/1000,
> 			TR.tv_sec, TR.tv_usec/1000,
> 			tt.tv_sec, tt.tv_usec/1000);
> 
> 		return 0;
> 	}
> 
> Before:
> 	# for i in 1 2 3; do /host/tmp/test 10000 100; done
> 	TW = 8.047 TR = 5.845 TT = 13.893
> 	TW = 8.091 TR = 5.872 TT = 13.963
> 	TW = 8.083 TR = 5.885 TT = 13.969
> After:
> 	# for i in 1 2 3; do /host/tmp/test 10000 100; done
> 	TW = 4.752 TR = 4.664 TT = 9.416
> 	TW = 4.684 TR = 4.608 TT = 9.293
> 	TW = 4.736 TR = 4.652 TT = 9.388
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
>  fs/pipe.c | 21 ++++++++++++++-------
>  1 file changed, 14 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 94b59045ab44..baaa8c0817f1 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -247,6 +247,11 @@ static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
>  	return tail;
>  }
>  
> +static inline bool is_pipe_inode(struct inode *inode)
> +{
> +	return inode->i_sb->s_magic == PIPEFS_MAGIC;
> +}
> +
>  static ssize_t
>  pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  {
> @@ -404,7 +409,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  	if (wake_next_reader)
>  		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>  	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> -	if (ret > 0)
> +	if (ret > 0 && !is_pipe_inode(file_inode(filp)))

pipe_read and pipe_write are the read_iter / write_iter ops for pipefs
inodes. Is there ever a situation where is_pipe_inode() will be false
here?

>  		file_accessed(filp);
>  	return ret;
>  }
> @@ -604,11 +609,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>  	if (wake_next_writer)
>  		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> -	if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
> -		int err = file_update_time(filp);
> -		if (err)
> -			ret = err;
> -		sb_end_write(file_inode(filp)->i_sb);
> +	if (ret > 0 && !is_pipe_inode(file_inode(filp))) {

Ditto.

> +		if (sb_start_write_trylock(file_inode(filp)->i_sb)) {
> +			int err = file_update_time(filp);
> +			if (err)
> +				ret = err;
> +			sb_end_write(file_inode(filp)->i_sb);
> +		}
>  	}
>  	return ret;
>  }
> @@ -1108,7 +1115,7 @@ static void wake_up_partner(struct pipe_inode_info *pipe)
>  static int fifo_open(struct inode *inode, struct file *filp)
>  {
>  	struct pipe_inode_info *pipe;
> -	bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
> +	bool is_pipe = is_pipe_inode(inode);

Same here: In what case is is_pipe() ever false here?

>  	int ret;
>  
>  	filp->f_pipe = 0;

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pipe: don't update {a,c,m}time for anonymous pipes
  2025-02-04 13:21 [PATCH] pipe: don't update {a,c,m}time for anonymous pipes Oleg Nesterov
  2025-02-04 14:21 ` Mateusz Guzik
  2025-02-04 14:48 ` Jeff Layton
@ 2025-02-04 14:53 ` Jeff Layton
  2025-02-04 15:28 ` Linus Torvalds
  3 siblings, 0 replies; 9+ messages in thread
From: Jeff Layton @ 2025-02-04 14:53 UTC (permalink / raw)
  To: Oleg Nesterov, Christian Brauner, Linus Torvalds
  Cc: David Howells, Gautham R. Shenoy, K Prateek Nayak, Mateusz Guzik,
	Neeraj Upadhyay, Oliver Sang, Swapnil Sapkal, WangYuli,
	linux-fsdevel, linux-kernel

On Tue, 2025-02-04 at 14:21 +0100, Oleg Nesterov wrote:
> These numbers are visible in fstat() but hopefully nobody uses this
> information and file_accessed/file_update_time are not that cheap.
> Stupid test-case:
> 
> 	#include <stdio.h>
> 	#include <stdlib.h>
> 	#include <unistd.h>
> 	#include <assert.h>
> 	#include <sys/ioctl.h>
> 	#include <sys/time.h>
> 
> 	static char buf[17 * 4096];
> 	static struct timeval TW, TR;
> 
> 	int wr(int fd, int size)
> 	{
> 		int c, r;
> 		struct timeval t0, t1;
> 
> 		gettimeofday(&t0, NULL);
> 		for (c = 0; (r = write(fd, buf, size)) > 0; c += r);
> 		gettimeofday(&t1, NULL);
> 		timeradd(&TW, &t1, &TW);
> 		timersub(&TW, &t0, &TW);
> 
> 		return c;
> 	}
> 
> 	int rd(int fd, int size)
> 	{
> 		int c, r;
> 		struct timeval t0, t1;
> 
> 		gettimeofday(&t0, NULL);
> 		for (c = 0; (r = read(fd, buf, size)) > 0; c += r);
> 		gettimeofday(&t1, NULL);
> 		timeradd(&TR, &t1, &TR);
> 		timersub(&TR, &t0, &TR);
> 
> 		return c;
> 	}
> 
> 	int main(int argc, const char *argv[])
> 	{
> 		int fd[2], nb = 1, loop, size;
> 
> 		assert(argc == 3);
> 		loop = atoi(argv[1]);
> 		size = atoi(argv[2]);
> 
> 		assert(pipe(fd) == 0);
> 		assert(ioctl(fd[0], FIONBIO, &nb) == 0);
> 		assert(ioctl(fd[1], FIONBIO, &nb) == 0);
> 
> 		assert(size <= sizeof(buf));
> 		while (loop--)
> 			assert(wr(fd[1], size) == rd(fd[0], size));
> 
> 		struct timeval tt;
> 		timeradd(&TW, &TR, &tt);
> 		printf("TW = %lu.%03lu TR = %lu.%03lu TT = %lu.%03lu\n",
> 			TW.tv_sec, TW.tv_usec/1000,
> 			TR.tv_sec, TR.tv_usec/1000,
> 			tt.tv_sec, tt.tv_usec/1000);
> 
> 		return 0;
> 	}
> 
> Before:
> 	# for i in 1 2 3; do /host/tmp/test 10000 100; done
> 	TW = 8.047 TR = 5.845 TT = 13.893
> 	TW = 8.091 TR = 5.872 TT = 13.963
> 	TW = 8.083 TR = 5.885 TT = 13.969
> After:
> 	# for i in 1 2 3; do /host/tmp/test 10000 100; done
> 	TW = 4.752 TR = 4.664 TT = 9.416
> 	TW = 4.684 TR = 4.608 TT = 9.293
> 	TW = 4.736 TR = 4.652 TT = 9.388
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
>  fs/pipe.c | 21 ++++++++++++++-------
>  1 file changed, 14 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 94b59045ab44..baaa8c0817f1 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -247,6 +247,11 @@ static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
>  	return tail;
>  }
>  
> +static inline bool is_pipe_inode(struct inode *inode)
> +{
> +	return inode->i_sb->s_magic == PIPEFS_MAGIC;
> +}
> +
>  static ssize_t
>  pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  {
> @@ -404,7 +409,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  	if (wake_next_reader)
>  		wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>  	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> -	if (ret > 0)
> +	if (ret > 0 && !is_pipe_inode(file_inode(filp)))
>  		file_accessed(filp);
>  	return ret;
>  }
> @@ -604,11 +609,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>  	if (wake_next_writer)
>  		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> -	if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
> -		int err = file_update_time(filp);
> -		if (err)
> -			ret = err;
> -		sb_end_write(file_inode(filp)->i_sb);
> +	if (ret > 0 && !is_pipe_inode(file_inode(filp))) {
> +		if (sb_start_write_trylock(file_inode(filp)->i_sb)) {
> +			int err = file_update_time(filp);
> +			if (err)
> +				ret = err;
> +			sb_end_write(file_inode(filp)->i_sb);
> +		}
>  	}
>  	return ret;
>  }
> @@ -1108,7 +1115,7 @@ static void wake_up_partner(struct pipe_inode_info *pipe)
>  static int fifo_open(struct inode *inode, struct file *filp)
>  {
>  	struct pipe_inode_info *pipe;
> -	bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
> +	bool is_pipe = is_pipe_inode(inode);
>  	int ret;
>  
>  	filp->f_pipe = 0;

Oh _anonymous_ pipes. You can disregard my earlier questions:

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pipe: don't update {a,c,m}time for anonymous pipes
  2025-02-04 13:21 [PATCH] pipe: don't update {a,c,m}time for anonymous pipes Oleg Nesterov
                   ` (2 preceding siblings ...)
  2025-02-04 14:53 ` Jeff Layton
@ 2025-02-04 15:28 ` Linus Torvalds
  2025-02-04 15:55   ` Oleg Nesterov
  3 siblings, 1 reply; 9+ messages in thread
From: Linus Torvalds @ 2025-02-04 15:28 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Christian Brauner, David Howells, Gautham R. Shenoy,
	K Prateek Nayak, Mateusz Guzik, Neeraj Upadhyay, Oliver Sang,
	Swapnil Sapkal, WangYuli, linux-fsdevel, linux-kernel

On Tue, 4 Feb 2025 at 05:22, Oleg Nesterov <oleg@redhat.com> wrote:
>
> @@ -404,7 +409,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>         if (wake_next_reader)
>                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> -       if (ret > 0)
> +       if (ret > 0 && !is_pipe_inode(file_inode(filp)))
>                 file_accessed(filp);
>         return ret;
>  }

So I really don't kike that "is_pipe_inode()" thing.

Yes, it has superficial naming problems (it should be about
*anonymous* pipes, and that should be reflected in the name), but I
think the deeper problem is that I think this should just be a
different set of read/write functions entirely.

IOW, we should have "anon_pipe_read()" and "anon_pipe_write()", and
then the named pipes just do

    int ret = anon_pipe_read(...);
    if (ret > 0)
        file_accessed(filp);
    return ret;

instead. With no tests for "what kind of pipe is this" (same for the
write side, of course)

               Linus

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pipe: don't update {a,c,m}time for anonymous pipes
  2025-02-04 15:28 ` Linus Torvalds
@ 2025-02-04 15:55   ` Oleg Nesterov
  0 siblings, 0 replies; 9+ messages in thread
From: Oleg Nesterov @ 2025-02-04 15:55 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Christian Brauner, David Howells, Gautham R. Shenoy,
	K Prateek Nayak, Mateusz Guzik, Neeraj Upadhyay, Oliver Sang,
	Swapnil Sapkal, WangYuli, linux-fsdevel, linux-kernel

On 02/04, Linus Torvalds wrote:
>
> IOW, we should have "anon_pipe_read()" and "anon_pipe_write()", and
> then the named pipes just do

OK, will do in v2.

Oleg.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] pipe: don't update {a,c,m}time for anonymous pipes
  2025-02-04 14:21 ` Mateusz Guzik
  2025-02-04 14:39   ` Oleg Nesterov
@ 2025-02-04 16:34   ` Christian Brauner
  2025-02-04 16:49     ` Linus Torvalds
  1 sibling, 1 reply; 9+ messages in thread
From: Christian Brauner @ 2025-02-04 16:34 UTC (permalink / raw)
  To: Mateusz Guzik, Oleg Nesterov, Linus Torvalds
  Cc: David Howells, Gautham R. Shenoy, K Prateek Nayak,
	Neeraj Upadhyay, Oliver Sang, Swapnil Sapkal, WangYuli,
	linux-fsdevel, linux-kernel

On Tue, Feb 04, 2025 at 03:21:18PM +0100, Mateusz Guzik wrote:
> On Tue, Feb 4, 2025 at 2:22 PM Oleg Nesterov <oleg@redhat.com> wrote:
> > These numbers are visible in fstat() but hopefully nobody uses this
> > information and file_accessed/file_update_time are not that cheap.
> 
> I'll note majority of the problem most likely stems from
> mnt_get_write_access rolling with:

So anonymous pipes are based on pipefs which isn't exposed to userspace
at all. Neither the superblock nor an individual mount can go read-only
or change mount properties. The superblock cannot be frozen etc.

So really that mnt_get_write_access() should be pointless for
anonymous pipes. In other words, couldn't this also just be:

diff --git a/fs/pipe.c b/fs/pipe.c
index 8df42e1ab3a3..e9989621362c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -604,12 +604,25 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
        kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
        if (wake_next_writer)
                wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
-       if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
-               int err = file_update_time(filp);
-               if (err)
-                       ret = err;
-               sb_end_write(file_inode(filp)->i_sb);
+       if (ret > 0) {
+               int err;
+
+               if (!is_anon_pipe(file)) {
+                       if (sb_start_write_trylock(file_inode(filp)->i_sb)) {
+                               err = file_update_time(filp);
+                               if (err)
+                                       ret = err;
+
+                               sb_end_write(file_inode(filp)->i_sb);
+                       }
+               } else {
+                       // Anonymous pipes don't need all the mount + sb bruah.
+                       err = inode_needs_update_time(inode);
+                       if (err > 0)
+                               ret = inode_update_time(file_inode(filp), err);
+               }
        }
+
        return ret;
 }

and then have zero regression risk?

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] pipe: don't update {a,c,m}time for anonymous pipes
  2025-02-04 16:34   ` Christian Brauner
@ 2025-02-04 16:49     ` Linus Torvalds
  0 siblings, 0 replies; 9+ messages in thread
From: Linus Torvalds @ 2025-02-04 16:49 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Mateusz Guzik, Oleg Nesterov, David Howells, Gautham R. Shenoy,
	K Prateek Nayak, Neeraj Upadhyay, Oliver Sang, Swapnil Sapkal,
	WangYuli, linux-fsdevel, linux-kernel

On Tue, 4 Feb 2025 at 08:34, Christian Brauner <brauner@kernel.org> wrote:
>
> So really that mnt_get_write_access() should be pointless for
> anonymous pipes. In other words, couldn't this also just be:

I bet you'll find that even just inode_update_times() isn't all that cheap.

Yes, the silly "get writability" thing is pointless for pipes, but the
"get current time" really isn't a no-op either. There's all the crazy
"coarse time vs fine time" crud, there's justv a *lot* of overhead.

That's not really noticeable on a "real" file where you never access
just one byte anyway, and where any write costs are elsewhere. But
pipes can be very cheap, so the silly overheads really show up.

                 Linus

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-02-04 16:50 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-04 13:21 [PATCH] pipe: don't update {a,c,m}time for anonymous pipes Oleg Nesterov
2025-02-04 14:21 ` Mateusz Guzik
2025-02-04 14:39   ` Oleg Nesterov
2025-02-04 16:34   ` Christian Brauner
2025-02-04 16:49     ` Linus Torvalds
2025-02-04 14:48 ` Jeff Layton
2025-02-04 14:53 ` Jeff Layton
2025-02-04 15:28 ` Linus Torvalds
2025-02-04 15:55   ` Oleg Nesterov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox