linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: NeilBrown <neil@brown.name>
To: Alexander Viro <viro@zeniv.linux.org.uk>,
	Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>
Cc: linux-fsdevel@vger.kernel.org
Subject: [PATCH 8/8] VFS: allow a filesystem to opt out of directory locking.
Date: Mon,  9 Jun 2025 17:34:13 +1000	[thread overview]
Message-ID: <20250609075950.159417-9-neil@brown.name> (raw)
In-Reply-To: <20250609075950.159417-1-neil@brown.name>

The VFS no longer needs the directory to be locked when performing
updates in the directory (create/remove/rename).  We only lock
directories during these ops because the filesystem might expect that.
Some filesystems may not need it.  Allow the filesystem to opt out by
setting no_dir_lock in inode_operations.

Signed-off-by: NeilBrown <neil@brown.name>
---
 fs/namei.c         | 75 ++++++++++++++++++++++++++++++++--------------
 include/linux/fs.h |  1 +
 2 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 5c9279657b32..55ea67b4f891 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2001,7 +2001,8 @@ struct dentry *lookup_and_lock_hashed(struct qstr *last,
 {
 	struct dentry *dentry;
 
-	inode_lock_nested(base->d_inode, I_MUTEX_PARENT);
+	if (!d_inode(base)->i_op->no_dir_lock)
+		inode_lock_nested(base->d_inode, I_MUTEX_PARENT);
 
 retry:
 	dentry = lookup_one_qstr(last, base, lookup_flags);
@@ -2011,7 +2012,8 @@ struct dentry *lookup_and_lock_hashed(struct qstr *last,
 		goto retry;
 	}
 
-	if (IS_ERR(dentry))
+	if (IS_ERR(dentry) &&
+	    !d_inode(base)->i_op->no_dir_lock)
 		inode_unlock(base->d_inode);
 	return dentry;
 }
@@ -2066,11 +2068,13 @@ struct dentry *lookup_and_lock_noperm(struct qstr *last,
 {
 	struct dentry *dentry;
 
-	inode_lock_nested(base->d_inode, I_MUTEX_PARENT);
+	if (!d_inode(base)->i_op->no_dir_lock)
+		inode_lock_nested(base->d_inode, I_MUTEX_PARENT);
 
 	dentry = lookup_and_lock_noperm_locked(last, base, lookup_flags,
 					       DLOCK_NORMAL);
-	if (IS_ERR(dentry))
+	if (IS_ERR(dentry) &&
+	    !d_inode(base)->i_op->no_dir_lock)
 		inode_unlock(base->d_inode);
 	return dentry;
 }
@@ -2097,9 +2101,11 @@ struct dentry *lookup_and_lock_noperm_nested(struct qstr *last,
 {
 	struct dentry *dentry;
 
-	inode_lock_nested(base->d_inode, I_MUTEX_PARENT);
+	if (!d_inode(base)->i_op->no_dir_lock)
+		inode_lock_nested(base->d_inode, I_MUTEX_PARENT);
 	dentry = lookup_and_lock_noperm_locked(last, base, lookup_flags, class);
-	if (IS_ERR(dentry))
+	if (IS_ERR(dentry) &&
+	    !d_inode(base)->i_op->no_dir_lock)
 		inode_unlock(base->d_inode);
 	return dentry;
 }
@@ -2160,9 +2166,12 @@ struct dentry *lookup_and_lock_killable(struct mnt_idmap *idmap,
 	struct dentry *dentry;
 	int err;
 
-	err = down_write_killable_nested(&base->d_inode->i_rwsem, I_MUTEX_PARENT);
-	if (err)
-		return ERR_PTR(err);
+	if (!d_inode(base)->i_op->no_dir_lock) {
+		err = down_write_killable_nested(&base->d_inode->i_rwsem,
+						 I_MUTEX_PARENT);
+		if (err)
+			return ERR_PTR(err);
+	}
 	err = lookup_one_common(idmap, last, base);
 	if (err < 0)
 		return ERR_PTR(err);
@@ -2176,7 +2185,8 @@ struct dentry *lookup_and_lock_killable(struct mnt_idmap *idmap,
 			return ERR_PTR(-ERESTARTSYS);
 		goto retry;
 	}
-	if (IS_ERR(dentry))
+	if (IS_ERR(dentry) &&
+	    !d_inode(base)->i_op->no_dir_lock)
 		inode_unlock(base->d_inode);
 	return dentry;
 }
@@ -2205,7 +2215,8 @@ bool lock_and_check_dentry(struct dentry *child, struct dentry *parent)
 	}
 	/* get the child to balance with dentry_unlock() which puts it. */
 	dget(child);
-	inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
+	if (!d_inode(parent)->i_op->no_dir_lock)
+		inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
 	return true;
 }
 EXPORT_SYMBOL(lock_and_check_dentry);
@@ -2230,7 +2241,8 @@ void dentry_unlock(struct dentry *dentry)
 {
 	if (!IS_ERR(dentry)) {
 		d_lookup_done(dentry);
-		inode_unlock(dentry->d_parent->d_inode);
+		if (!dentry->d_parent->d_inode->i_op->no_dir_lock)
+			inode_unlock(dentry->d_parent->d_inode);
 		dentry_unlock_dir_locked(dentry);
 	}
 }
@@ -2342,9 +2354,11 @@ static struct dentry *lookup_slow(const struct qstr *name,
 {
 	struct inode *inode = dir->d_inode;
 	struct dentry *res;
-	inode_lock_shared(inode);
+	if (!inode->i_op->no_dir_lock)
+		inode_lock_shared(inode);
 	res = __lookup_slow(name, dir, flags);
-	inode_unlock_shared(inode);
+	if (!inode->i_op->no_dir_lock)
+		inode_unlock_shared(inode);
 	return res;
 }
 
@@ -3721,6 +3735,9 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
  */
 static struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 {
+	if (d_inode(p1)->i_op->no_dir_lock)
+		return NULL;
+
 	if (p1 == p2) {
 		inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
 		return NULL;
@@ -3735,6 +3752,9 @@ static struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
  */
 static struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2)
 {
+	if (d_inode(c1)->i_op->no_dir_lock)
+		return NULL;
+
 	if (READ_ONCE(c1->d_parent) == p2) {
 		/*
 		 * hopefully won't need to touch ->s_vfs_rename_mutex at all.
@@ -3773,6 +3793,8 @@ static struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2)
 
 static void unlock_rename(struct dentry *p1, struct dentry *p2)
 {
+	if (d_inode(p1)->i_op->no_dir_lock)
+		return;
 	inode_unlock(p1->d_inode);
 	if (p1 != p2) {
 		inode_unlock(p2->d_inode);
@@ -3880,6 +3902,10 @@ static struct dentry *lock_ancestors(struct dentry *d1, struct dentry *d2)
 {
 	struct dentry *locked, *ancestor;
 
+	if (!d_inode(d1)->i_op->no_dir_lock)
+		/* s_vfs_rename_mutex is being used, so skip this locking */
+		return NULL;
+
 	if (d1->d_parent == d2->d_parent)
 		/* Nothing to lock */
 		return NULL;
@@ -4194,6 +4220,7 @@ void dentry_unlock_rename(struct renamedata *rd)
 	renaming_unlock(rd->old_dir, rd->new_dir, rd->ancestor,
 			rd->old_dentry, rd->new_dentry);
 
+	if (!d_inode(rd->old_dir)->i_op->no_dir_lock)
 	unlock_rename(rd->old_dir, rd->new_dir);
 
 	dput(rd->old_dir);
@@ -4697,19 +4724,23 @@ static const char *open_last_lookups(struct nameidata *nd,
 		 * dropping this one anyway.
 		 */
 	}
-	if (open_flag & O_CREAT)
-		inode_lock(dir->d_inode);
-	else
-		inode_lock_shared(dir->d_inode);
+	if (!d_inode(dir)->i_op->no_dir_lock) {
+		if (open_flag & O_CREAT)
+			inode_lock(dir->d_inode);
+		else
+			inode_lock_shared(dir->d_inode);
+	}
 	dentry = lookup_open(nd, file, op, got_write);
 	if (!IS_ERR(dentry)) {
 		if (file->f_mode & FMODE_OPENED)
 			fsnotify_open(file);
 	}
-	if (open_flag & O_CREAT)
-		inode_unlock(dir->d_inode);
-	else
-		inode_unlock_shared(dir->d_inode);
+	if (!d_inode(dir)->i_op->no_dir_lock) {
+		if (open_flag & O_CREAT)
+			inode_unlock(dir->d_inode);
+		else
+			inode_unlock_shared(dir->d_inode);
+	}
 
 	if (got_write)
 		mnt_drop_write(nd->path.mnt);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6b4a1a1f4786..b213993c486a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2225,6 +2225,7 @@ int wrap_directory_iterator(struct file *, struct dir_context *,
 	{ return wrap_directory_iterator(file, ctx, x); }
 
 struct inode_operations {
+	bool no_dir_lock:1;
 	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
 	const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
 	int (*permission) (struct mnt_idmap *, struct inode *, int);
-- 
2.49.0


      parent reply	other threads:[~2025-06-09  8:00 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-09  7:34 [PATCH 0/8 preview] demonstrate proposed new locking strategy for directories NeilBrown
2025-06-09  7:34 ` [PATCH 1/8] VFS: use global wait-queue table for d_alloc_parallel() NeilBrown
2025-06-09  7:34 ` [PATCH 2/8] VFS: use d_alloc_parallel() in lookup_one_qstr_excl() NeilBrown
2025-06-09  7:34 ` [PATCH 3/8] fs/proc: take rcu_read_lock() in proc_sys_compare() NeilBrown
2025-06-09  7:34 ` [PATCH 4/8] VFS: Add ability to exclusively lock a dentry and use for open/create NeilBrown
2025-06-09  7:34 ` [PATCH 5/8] Introduce S_DYING which warns that S_DEAD might follow NeilBrown
2025-06-10 20:57   ` Al Viro
2025-06-11  1:00     ` NeilBrown
2025-06-11  1:13       ` Al Viro
2025-06-11  2:49         ` NeilBrown
2025-06-09  7:34 ` [PATCH 6/8] VFS: provide alternative to s_vfs_rename_mutex NeilBrown
2025-06-09  7:34 ` [PATCH 7/8] VFS: use new dentry locking for create/remove/rename NeilBrown
2025-06-10 20:36   ` Al Viro
2025-06-11  0:34     ` NeilBrown
2025-06-09  7:34 ` NeilBrown [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250609075950.159417-9-neil@brown.name \
    --to=neil@brown.name \
    --cc=brauner@kernel.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).