All of lore.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@zeniv.linux.org.uk>
To: Mateusz Guzik <mjguzik@gmail.com>
Cc: brauner@kernel.org, jack@suse.cz, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org
Subject: [PATCH 3/3] avoid extra path_get/path_put cycle in path_openat()
Date: Thu, 22 Aug 2024 01:41:49 +0100	[thread overview]
Message-ID: <20240822004149.GR504335@ZenIV> (raw)
In-Reply-To: <20240822003359.GO504335@ZenIV>

Once we'd opened the file, nd->path and file->f_path have the
same contents.  Rather than having both pinned and nd->path
dropped by terminate_walk(), let's have them share the
references from the moment when FMODE_OPENED is set and
clear nd->path just before the terminate_walk() in such case.

To do that, we
	* add a variant of vfs_open() that does *not* do conditional
path_get() (vfs_open_borrow()); use it in do_open().
	* don't grab f->f_path.mnt in finish_open() - only
f->f_path.dentry.  Have atomic_open() drop the child dentry
in FMODE_OPENED case and return f->path.dentry without grabbing it.
	* adjust vfs_tmpfile() for finish_open() change (it
is called from ->tmpfile() instances).
	* make do_o_path() use vfs_open_borrow(), collapse path_put()
there with the conditional path_get() we would've get in vfs_open().
	* in FMODE_OPENED case clear nd->path before calling
terminate_walk().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h |  1 +
 fs/namei.c    | 22 ++++++++++++++--------
 fs/open.c     | 19 ++++++++++++++++++-
 3 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index cdd73209eecb..11834829cc3f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -194,6 +194,7 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 		int flag);
 int chown_common(const struct path *path, uid_t user, gid_t group);
 extern int vfs_open(const struct path *, struct file *);
+extern int vfs_open_borrow(const struct path *, struct file *);
 
 /*
  * inode.c
diff --git a/fs/namei.c b/fs/namei.c
index 5512cb10fa89..e02160460422 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3443,10 +3443,8 @@ static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	d_lookup_done(dentry);
 	if (!error) {
 		if (file->f_mode & FMODE_OPENED) {
-			if (unlikely(dentry != file->f_path.dentry)) {
-				dput(dentry);
-				dentry = dget(file->f_path.dentry);
-			}
+			dput(dentry);
+			dentry = file->f_path.dentry;
 		} else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
 			error = -EIO;
 		} else {
@@ -3724,7 +3722,7 @@ static int do_open(struct nameidata *nd,
 	}
 	error = may_open(idmap, &nd->path, acc_mode, open_flag);
 	if (!error && !(file->f_mode & FMODE_OPENED))
-		error = vfs_open(&nd->path, file);
+		error = vfs_open_borrow(&nd->path, file);
 	if (!error)
 		error = security_file_post_open(file, op->acc_mode);
 	if (!error && do_truncate)
@@ -3777,8 +3775,10 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
 	mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
 	error = dir->i_op->tmpfile(idmap, dir, file, mode);
 	dput(child);
-	if (file->f_mode & FMODE_OPENED)
+	if (file->f_mode & FMODE_OPENED) {
+		mntget(file->f_path.mnt);
 		fsnotify_open(file);
+	}
 	if (error)
 		return error;
 	/* Don't check for other permissions, the inode was just created */
@@ -3857,8 +3857,9 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
 	int error = path_lookupat(nd, flags, &path);
 	if (!error) {
 		audit_inode(nd->name, path.dentry, 0);
-		error = vfs_open(&path, file);
-		path_put(&path);
+		error = vfs_open_borrow(&path, file);
+		if (!(file->f_mode & FMODE_OPENED))
+			path_put(&path);
 	}
 	return error;
 }
@@ -3884,6 +3885,11 @@ static struct file *path_openat(struct nameidata *nd,
 			;
 		if (!error)
 			error = do_open(nd, file, op);
+		if (file->f_mode & FMODE_OPENED) {
+			// borrowed into file->f_path, transfer it there
+			nd->path.mnt = NULL;
+			nd->path.dentry = NULL;
+		}
 		terminate_walk(nd);
 	}
 	if (likely(!error)) {
diff --git a/fs/open.c b/fs/open.c
index 0ec2e9a33856..f9988427fb97 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1046,7 +1046,7 @@ int finish_open(struct file *file, struct dentry *dentry,
 	file->f_path.dentry = dentry;
 	err = do_dentry_open(file, open);
 	if (file->f_mode & FMODE_OPENED)
-		path_get(&file->f_path);
+		dget(&file->f_path.dentry);
 	return err;
 }
 EXPORT_SYMBOL(finish_open);
@@ -1102,6 +1102,23 @@ int vfs_open(const struct path *path, struct file *file)
 	return ret;
 }
 
+int vfs_open_borrow(const struct path *path, struct file *file)
+{
+	int ret;
+
+	file->f_path = *path;
+	ret = do_dentry_open(file, NULL);
+	if (!ret) {
+		/*
+		 * Once we return a file with FMODE_OPENED, __fput() will call
+		 * fsnotify_close(), so we need fsnotify_open() here for
+		 * symmetry.
+		 */
+		fsnotify_open(file);
+	}
+	return ret;
+}
+
 struct file *dentry_open(const struct path *path, int flags,
 			 const struct cred *cred)
 {
-- 
2.39.2


  parent reply	other threads:[~2024-08-22  0:41 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-06 14:46 [PATCH] vfs: avoid spurious dentry ref/unref cycle on open Mateusz Guzik
2024-08-06 15:53 ` Al Viro
2024-08-06 16:09   ` Mateusz Guzik
2024-08-06 16:14     ` Mateusz Guzik
2024-08-07  3:38     ` Al Viro
2024-08-07  3:57       ` Mateusz Guzik
2024-08-07  5:32         ` Al Viro
2024-08-07  5:46           ` Mateusz Guzik
2024-08-07  6:23         ` Al Viro
2024-08-07  6:33           ` Al Viro
2024-08-07  6:40             ` Mateusz Guzik
2024-08-07  7:05               ` Al Viro
2024-08-07  7:22                 ` Mateusz Guzik
2024-08-07  7:52                   ` Al Viro
2024-08-07  7:59                     ` Mateusz Guzik
2024-08-07  9:50                       ` Mateusz Guzik
2024-08-07 12:43                         ` Al Viro
2024-08-07 20:38                           ` Al Viro
2024-08-20 11:38                             ` Mateusz Guzik
2024-08-22  0:33                               ` Al Viro
2024-08-22  0:34                                 ` [PATCH 1/3] don't duplicate vfs_open() in kernel_file_open() Al Viro
2024-08-22  7:53                                   ` Christian Brauner
2024-08-22  0:41                                 ` [PATCH 2/3] lift grabbing path into caller of do_dentry_open() Al Viro
2024-08-22  7:54                                   ` Christian Brauner
2024-08-22  0:41                                 ` Al Viro [this message]
2024-08-22  9:31                                   ` [PATCH 3/3] avoid extra path_get/path_put cycle in path_openat() Christian Brauner
2024-08-22 10:21                                   ` Mateusz Guzik
2024-08-22 15:16                                   ` kernel test robot
2024-08-22 16:28                                   ` kernel test robot
2025-02-17  8:03                                 ` [PATCH] vfs: avoid spurious dentry ref/unref cycle on open Mateusz Guzik
2024-08-08  6:26                           ` Mateusz Guzik
2024-08-06 22:51 ` Dave Chinner
2024-08-06 22:55   ` Mateusz Guzik
2024-08-07  2:56     ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240822004149.GR504335@ZenIV \
    --to=viro@zeniv.linux.org.uk \
    --cc=brauner@kernel.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mjguzik@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.