linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] fs: Propagate FMODE_NOCMTIME flag to user-facing O_NOCMTIME
@ 2025-10-03  9:32 Pavel Emelyanov
  2025-10-04  4:26 ` Christoph Hellwig
  0 siblings, 1 reply; 15+ messages in thread
From: Pavel Emelyanov @ 2025-10-03  9:32 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Raphael S . Carvalho, Christoph Hellwig, Pavel Emelyanov

The FMODE_NOCMTIME flag tells that ctime and mtime stamps are not
updated on IO. The flag was introduced long ago by 4d4be482a4 ([XFS]
add a FMODE flag to make XFS invisible I/O less hacky. Back then it
was suggested that this flag is propagated to a O_NOCMTIME one.
This patch does so.

It can be used by workloads that want to write a file but don't care
much about the preciese timestamp on it and can update it later with
utimens() call.

There's another reason for having this patch. When performing AIO write,
the file_modified_flags() function checks whether or not to update inode
times. In case update is needed and iocb carries the RWF_NOWAIT flag,
the check return EINTR error that quickly propagates into cb completion
without doing any IO. This restriction effectively prevents doing AIO
writes with nowait flag, as file modifications really imply time update.

There was an attempt to mitigate this requirement [1] by a patch titled
"inode: Relax RWF_NOWAIT restriction for EINTR in file_modified_flags()"
It would require lazytime mount, but it's still probabilistic, as
marking inode dirty for future timestamp update is not guaranteed not to
block. More bullet-proof aproach would be not to update cmtime on writes
at all.

Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>

[1] https://marc.info/?l=linux-fsdevel&m=175768745515859&w=2
---
 arch/alpha/include/uapi/asm/fcntl.h  | 2 ++
 arch/parisc/include/uapi/asm/fcntl.h | 2 ++
 arch/sparc/include/uapi/asm/fcntl.h  | 2 ++
 fs/fcntl.c                           | 7 ++++---
 fs/inode.c                           | 2 +-
 fs/namei.c                           | 2 +-
 fs/xfs/xfs_exchrange.c               | 4 ++--
 fs/xfs/xfs_handle.c                  | 3 +--
 include/linux/fcntl.h                | 2 +-
 include/linux/fs.h                   | 8 --------
 include/trace/misc/fs.h              | 1 +
 include/uapi/asm-generic/fcntl.h     | 4 ++++
 12 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/arch/alpha/include/uapi/asm/fcntl.h b/arch/alpha/include/uapi/asm/fcntl.h
index 50bdc8e8a271..41b31ed1714d 100644
--- a/arch/alpha/include/uapi/asm/fcntl.h
+++ b/arch/alpha/include/uapi/asm/fcntl.h
@@ -35,6 +35,8 @@
 #define O_PATH		040000000
 #define __O_TMPFILE	0100000000
 
+#define O_NOCMTIME	0200000000
+
 #define F_GETLK		7
 #define F_SETLK		8
 #define F_SETLKW	9
diff --git a/arch/parisc/include/uapi/asm/fcntl.h b/arch/parisc/include/uapi/asm/fcntl.h
index 03dee816cb13..3c68f7918b70 100644
--- a/arch/parisc/include/uapi/asm/fcntl.h
+++ b/arch/parisc/include/uapi/asm/fcntl.h
@@ -20,6 +20,8 @@
 #define O_PATH		020000000
 #define __O_TMPFILE	040000000
 
+#define O_NOCMTIME	0100000000
+
 #define F_GETLK64	8
 #define F_SETLK64	9
 #define F_SETLKW64	10
diff --git a/arch/sparc/include/uapi/asm/fcntl.h b/arch/sparc/include/uapi/asm/fcntl.h
index 67dae75e5274..69590581b9f7 100644
--- a/arch/sparc/include/uapi/asm/fcntl.h
+++ b/arch/sparc/include/uapi/asm/fcntl.h
@@ -38,6 +38,8 @@
 #define O_PATH		0x1000000
 #define __O_TMPFILE	0x2000000
 
+#define O_NOCMTIME	0x4000000
+
 #define F_GETOWN	5	/*  for sockets. */
 #define F_SETOWN	6	/*  for sockets. */
 #define F_GETLK		7
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 72f8433d9109..e05129e7f658 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -34,7 +34,7 @@
 
 #include "internal.h"
 
-#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
+#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME | O_NOCMTIME)
 
 static int setfl(int fd, struct file * filp, unsigned int arg)
 {
@@ -49,7 +49,8 @@ static int setfl(int fd, struct file * filp, unsigned int arg)
 		return -EPERM;
 
 	/* O_NOATIME can only be set by the owner or superuser */
-	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
+	if (((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) ||
+			((arg & O_NOCMTIME) && !(filp->f_flags & O_NOCMTIME)))
 		if (!inode_owner_or_capable(file_mnt_idmap(filp), inode))
 			return -EPERM;
 
@@ -1156,7 +1157,7 @@ static int __init fcntl_init(void)
 	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
 	 * is defined as O_NONBLOCK on some platforms and not on others.
 	 */
-	BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ !=
+	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
 		HWEIGHT32(
 			(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
 			__FMODE_EXEC));
diff --git a/fs/inode.c b/fs/inode.c
index ec9339024ac3..69b2faf6350b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2404,7 +2404,7 @@ static int file_modified_flags(struct file *file, int flags)
 	if (ret)
 		return ret;
 
-	if (unlikely(file->f_mode & FMODE_NOCMTIME))
+	if (unlikely(file->f_flags & O_NOCMTIME))
 		return 0;
 
 	ret = inode_needs_update_time(inode);
diff --git a/fs/namei.c b/fs/namei.c
index 507ca0d7878d..ba423dd12e48 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3577,7 +3577,7 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path,
 	}
 
 	/* O_NOATIME can only be set by the owner or superuser */
-	if (flag & O_NOATIME && !inode_owner_or_capable(idmap, inode))
+	if (flag & (O_NOATIME | O_NOCMTIME) && !inode_owner_or_capable(idmap, inode))
 		return -EPERM;
 
 	return 0;
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c
index 0b41bdfecdfb..4b1eaa9db4ae 100644
--- a/fs/xfs/xfs_exchrange.c
+++ b/fs/xfs/xfs_exchrange.c
@@ -772,9 +772,9 @@ xfs_exchange_range(
 		return ret;
 
 	/* Update cmtime if the fd/inode don't forbid it. */
-	if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))
+	if (!(fxr->file1->f_flags & O_NOCMTIME) && !IS_NOCMTIME(inode1))
 		fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME1;
-	if (!(fxr->file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2))
+	if (!(fxr->file2->f_flags & O_NOCMTIME) && !IS_NOCMTIME(inode2))
 		fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME2;
 
 	file_start_write(fxr->file2);
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c
index f19fce557354..0e8c84385f37 100644
--- a/fs/xfs/xfs_handle.c
+++ b/fs/xfs/xfs_handle.c
@@ -295,8 +295,7 @@ xfs_open_by_handle(
 	}
 
 	if (S_ISREG(inode->i_mode)) {
-		filp->f_flags |= O_NOATIME;
-		filp->f_mode |= FMODE_NOCMTIME;
+		filp->f_flags |= (O_NOATIME | O_NOCMTIME);
 	}
 
 	fd_install(fd, filp);
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index a332e79b3207..1105a0bd5847 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -10,7 +10,7 @@
 	(O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \
 	 O_APPEND | O_NDELAY | O_NONBLOCK | __O_SYNC | O_DSYNC | \
 	 FASYNC	| O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \
-	 O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE)
+	 O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE | O_NOCMTIME)
 
 /* List of all valid flags for the how->resolve argument: */
 #define VALID_RESOLVE_FLAGS \
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 75fb216b0f7a..3f84e6a42e6e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -136,14 +136,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* 64bit hashes as llseek() offset (for directories) */
 #define FMODE_64BITHASH         ((__force fmode_t)(1 << 10))
 
-/*
- * Don't update ctime and mtime.
- *
- * Currently a special hack for the XFS open_by_handle ioctl, but we'll
- * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
- */
-#define FMODE_NOCMTIME		((__force fmode_t)(1 << 11))
-
 /* Expect random access pattern */
 #define FMODE_RANDOM		((__force fmode_t)(1 << 12))
 
diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h
index 0406ebe2a80a..aa8cf481dcc0 100644
--- a/include/trace/misc/fs.h
+++ b/include/trace/misc/fs.h
@@ -37,6 +37,7 @@
 		{ O_DIRECTORY,		"O_DIRECTORY" }, \
 		{ O_NOFOLLOW,		"O_NOFOLLOW" }, \
 		{ O_NOATIME,		"O_NOATIME" }, \
+		{ O_NOCMTIME,		"O_NOCMTIME" }, \
 		{ O_CLOEXEC,		"O_CLOEXEC" })
 
 #define __fmode_flag(x)	{ (__force unsigned long)FMODE_##x, #x }
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 613475285643..39f637bfb19a 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -95,6 +95,10 @@
 #define O_NDELAY	O_NONBLOCK
 #endif
 
+#ifndef O_NOCMTIME
+#define O_NOCMTIME	040000000
+#endif
+
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
 #define F_SETFD		2	/* set/clear close_on_exec */
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2025-10-11  4:04 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-03  9:32 [PATCH] fs: Propagate FMODE_NOCMTIME flag to user-facing O_NOCMTIME Pavel Emelyanov
2025-10-04  4:26 ` Christoph Hellwig
2025-10-04 16:08   ` Andy Lutomirski
2025-10-07  5:08     ` Christoph Hellwig
2025-10-08 15:22       ` Andy Lutomirski
2025-10-08 21:27         ` Dave Chinner
2025-10-08 21:51           ` Andy Lutomirski
2025-10-11  1:35             ` Dave Chinner
2025-10-11  4:04               ` Andy Lutomirski
2025-10-10  5:27         ` Christoph Hellwig
2025-10-10 17:35           ` Andy Lutomirski
2025-10-05 22:06   ` Dave Chinner
2025-10-07  5:10     ` Christoph Hellwig
2025-10-05 23:38   ` Dave Chinner
2025-10-06  2:16     ` Theodore Ts'o

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).