linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mateusz Guzik <mjguzik@gmail.com>
To: brauner@kernel.org
Cc: viro@zeniv.linux.org.uk, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, torvalds@linux-foundation.org,
	Mateusz Guzik <mjguzik@gmail.com>
Subject: [PATCH] vfs: shave work on failed file open
Date: Mon, 25 Sep 2023 22:55:45 +0200	[thread overview]
Message-ID: <20230925205545.4135472-1-mjguzik@gmail.com> (raw)

Failed opens (mostly ENOENT) legitimately happen a lot, for example here
are stats from stracing kernel build for few seconds (strace -fc make):

  % time     seconds  usecs/call     calls    errors syscall
  ------ ----------- ----------- --------- --------- ------------------
    0.76    0.076233           5     15040      3688 openat

(this is tons of header files tried in different paths)

Apart from a rare corner case where the file object is fully constructed
and we need to abort, there is a lot of overhead which can be avoided.

Most notably delegation of freeing to task_work, which comes with an
enormous cost (see 021a160abf62 ("fs: use __fput_sync in close(2)" for
an example).

Benched with will-it-scale with a custom testcase based on
tests/open1.c:
[snip]
        while (1) {
                int fd = open("/tmp/nonexistent", O_RDONLY);
                assert(fd == -1);

                (*iterations)++;
        }
[/snip]

Sapphire Rapids, one worker in single-threaded case (ops/s):
before:	1950013
after:	2914973 (+49%)

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
 fs/file_table.c      | 39 +++++++++++++++++++++++++++++++++++++++
 fs/namei.c           |  2 +-
 include/linux/file.h |  1 +
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index ee21b3da9d08..320dc1f9aa0e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -82,6 +82,16 @@ static inline void file_free(struct file *f)
 	call_rcu(&f->f_rcuhead, file_free_rcu);
 }
 
+static inline void file_free_badopen(struct file *f)
+{
+	BUG_ON(f->f_mode & (FMODE_BACKING | FMODE_OPENED));
+	security_file_free(f);
+	put_cred(f->f_cred);
+	if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
+		percpu_counter_dec(&nr_files);
+	kmem_cache_free(filp_cachep, f);
+}
+
 /*
  * Return the total number of open files in the system
  */
@@ -468,6 +478,35 @@ void __fput_sync(struct file *file)
 EXPORT_SYMBOL(fput);
 EXPORT_SYMBOL(__fput_sync);
 
+/*
+ * Clean up after failing to open (e.g., open(2) returns with -ENOENT).
+ *
+ * This represents opportunities to shave on work in the common case compared
+ * to the usual fput:
+ * 1. vast majority of the time FMODE_OPENED is not set, meaning there is no
+ *    need to delegate to task_work
+ * 2. if the above holds then we are guaranteed we have the only reference with
+ *    nobody else seeing the file, thus no need to use atomics to release it
+ * 3. then there is no need to delegate freeing to RCU
+ */
+void fput_badopen(struct file *file)
+{
+	if (unlikely(file->f_mode & (FMODE_BACKING | FMODE_OPENED))) {
+		fput(file);
+		return;
+	}
+
+	if (WARN_ON(atomic_long_read(&file->f_count) != 1)) {
+		fput(file);
+		return;
+	}
+
+	/* zero out the ref count to appease possible asserts */
+	atomic_long_set(&file->f_count, 0);
+	file_free_badopen(file);
+}
+EXPORT_SYMBOL(fput_badopen);
+
 void __init files_init(void)
 {
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
diff --git a/fs/namei.c b/fs/namei.c
index 567ee547492b..67579fe30b28 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3802,7 +3802,7 @@ static struct file *path_openat(struct nameidata *nd,
 		WARN_ON(1);
 		error = -EINVAL;
 	}
-	fput(file);
+	fput_badopen(file);
 	if (error == -EOPENSTALE) {
 		if (flags & LOOKUP_RCU)
 			error = -ECHILD;
diff --git a/include/linux/file.h b/include/linux/file.h
index 6e9099d29343..96300e27d9a8 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -15,6 +15,7 @@
 struct file;
 
 extern void fput(struct file *);
+extern void fput_badopen(struct file *);
 
 struct file_operations;
 struct task_struct;
-- 
2.39.2


             reply	other threads:[~2023-09-25 20:55 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-25 20:55 Mateusz Guzik [this message]
2023-09-26 14:01 ` [PATCH] vfs: shave work on failed file open Christian Brauner
2023-09-26 14:07   ` Mateusz Guzik
2023-09-26 14:24     ` Christian Brauner
2023-09-26 15:40       ` Mateusz Guzik
2023-09-26 20:59 ` John Stoffel
2023-09-26 21:07   ` Mateusz Guzik
2023-09-27 18:43     ` John Stoffel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230925205545.4135472-1-mjguzik@gmail.com \
    --to=mjguzik@gmail.com \
    --cc=brauner@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).