From: NeilBrown <neilb@suse.de>
To: Alexander Viro <viro@zeniv.linux.org.uk>,
Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,
Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 08/11] VFS: add inode_dir_lock/unlock
Date: Fri, 20 Dec 2024 13:54:26 +1100 [thread overview]
Message-ID: <20241220030830.272429-9-neilb@suse.de> (raw)
In-Reply-To: <20241220030830.272429-1-neilb@suse.de>
During the transition from providing exclusive locking on the directory
for directory modifying operation to providing exclusive locking only on
the dentry with a shared lock on the directory - we need an alternate
way to provide exclusion on the directory for file systems which haven't
been converted. This is provided by inode_dir_lock() and
inode_dir_inlock().
This uses a bit in i_state for locking, and wait_var_event_spinlock() for
waiting.
Signed-off-by: NeilBrown <neilb@suse.de>
---
fs/inode.c | 3 ++
fs/namei.c | 81 +++++++++++++++++++++++++++++++++++++---------
include/linux/fs.h | 5 +++
3 files changed, 74 insertions(+), 15 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index 6b4c77268fc0..9ba69837aa56 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -492,6 +492,8 @@ EXPORT_SYMBOL(address_space_init_once);
*/
void inode_init_once(struct inode *inode)
{
+ static struct lock_class_key __key;
+
memset(inode, 0, sizeof(*inode));
INIT_HLIST_NODE(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_devices);
@@ -501,6 +503,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_sb_list);
__address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
+ lockdep_init_map(&inode->i_dirlock_map, "I_DIR_LOCKED", &__key, 0);
}
EXPORT_SYMBOL(inode_init_once);
diff --git a/fs/namei.c b/fs/namei.c
index 371c80902c59..68750b15dbf4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3364,6 +3364,34 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
return mode;
}
+static bool check_dir_locked(struct inode *dir)
+{
+ if (dir->i_state & I_DIR_LOCKED) {
+ dir->i_state |= I_DIR_LOCK_WAITER;
+ return true;
+ }
+ return false;
+}
+
+static void inode_lock_dir(struct inode *dir)
+{
+ lock_acquire_exclusive(&dir->i_dirlock_map, 0, 0, NULL, _THIS_IP_);
+ spin_lock(&dir->i_lock);
+ wait_var_event_spinlock(dir, !check_dir_locked(dir),
+ &dir->i_lock);
+ dir->i_state |= I_DIR_LOCKED;
+ spin_unlock(&dir->i_lock);
+}
+
+static void inode_unlock_dir(struct inode *dir)
+{
+ lock_map_release(&dir->i_dirlock_map);
+ spin_lock(&dir->i_lock);
+ dir->i_state &= ~(I_DIR_LOCKED | I_DIR_LOCK_WAITER);
+ wake_up_var_locked(dir, &dir->i_lock);
+ spin_unlock(&dir->i_lock);
+}
+
/**
* vfs_create - create new file
* @idmap: idmap of the mount the inode was found from
@@ -3396,10 +3424,13 @@ int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
error = security_inode_create(dir, dentry, mode);
if (error)
return error;
- if (dir->i_op->create_shared)
+ if (dir->i_op->create_shared) {
error = dir->i_op->create_shared(idmap, dir, dentry, mode, want_excl);
- else
+ } else {
+ inode_lock_dir(dir);
error = dir->i_op->create(idmap, dir, dentry, mode, want_excl);
+ inode_unlock_dir(dir);
+ }
if (!error)
fsnotify_create(dir, dentry);
return error;
@@ -3699,16 +3730,19 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
file->f_mode |= FMODE_CREATED;
audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
- if (dir_inode->i_op->create_shared)
+ if (dir_inode->i_op->create_shared) {
error = dir_inode->i_op->create_shared(idmap, dir_inode,
dentry, mode,
open_flag & O_EXCL);
- else if (dir_inode->i_op->create)
+ } else if (dir_inode->i_op->create) {
+ inode_lock_dir(dir_inode);
error = dir_inode->i_op->create(idmap, dir_inode,
dentry, mode,
open_flag & O_EXCL);
- else
+ inode_unlock_dir(dir_inode);
+ } else {
error = -EACCES;
+ }
if (error)
goto out_dput;
}
@@ -4227,10 +4261,13 @@ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
if (error)
return error;
- if (dir->i_op->mknod_shared)
+ if (dir->i_op->mknod_shared) {
error = dir->i_op->mknod_shared(idmap, dir, dentry, mode, dev);
- else
+ } else {
+ inode_lock_dir(dir);
error = dir->i_op->mknod(idmap, dir, dentry, mode, dev);
+ inode_unlock_dir(dir);
+ }
if (!error)
fsnotify_create(dir, dentry);
return error;
@@ -4360,7 +4397,9 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
else if (de)
dput(de);
} else {
+ inode_lock_dir(dir);
error = dir->i_op->mkdir(idmap, dir, dentry, mode);
+ inode_unlock_dir(dir);
}
if (!error)
fsnotify_mkdir(dir, dentry);
@@ -4521,10 +4560,13 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
if (error)
goto out;
- if (dir->i_op->rmdir_shared)
+ if (dir->i_op->rmdir_shared) {
error = dir->i_op->rmdir_shared(dir, dentry);
- else
+ } else {
+ inode_lock_dir(dir);
error = dir->i_op->rmdir(dir, dentry);
+ inode_unlock_dir(dir);
+ }
if (error)
goto out;
@@ -4648,10 +4690,13 @@ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
error = try_break_deleg(target, delegated_inode);
if (error)
goto out;
- if (dir->i_op->unlink_shared)
+ if (dir->i_op->unlink_shared) {
error = dir->i_op->unlink_shared(dir, dentry);
- else
+ } else {
+ inode_lock_dir(dir);
error = dir->i_op->unlink(dir, dentry);
+ inode_unlock_dir(dir);
+ }
if (!error) {
dont_mount(dentry);
detach_mounts(dentry);
@@ -4792,10 +4837,13 @@ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
if (error)
return error;
- if (dir->i_op->symlink_shared)
+ if (dir->i_op->symlink_shared) {
error = dir->i_op->symlink_shared(idmap, dir, dentry, oldname);
- else
+ } else {
+ inode_lock_dir(dir);
error = dir->i_op->symlink(idmap, dir, dentry, oldname);
+ inode_unlock_dir(dir);
+ }
if (!error)
fsnotify_create(dir, dentry);
return error;
@@ -4920,10 +4968,13 @@ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
error = try_break_deleg(inode, delegated_inode);
if (error)
;
- else if (dir->i_op->link_shared)
+ else if (dir->i_op->link_shared) {
error = dir->i_op->link_shared(old_dentry, dir, new_dentry);
- else
+ } else {
+ inode_lock_dir(dir);
error = dir->i_op->link(old_dentry, dir, new_dentry);
+ inode_unlock_dir(dir);
+ }
}
if (!error && (inode->i_state & I_LINKABLE)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 68eba181175b..3ca92a54f28e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -722,6 +722,8 @@ struct inode {
void (*free_inode)(struct inode *);
};
struct file_lock_context *i_flctx;
+
+ struct lockdep_map i_dirlock_map; /* For tracking I_DIR_LOCKED locks */
struct address_space i_data;
struct list_head i_devices;
union {
@@ -2493,6 +2495,9 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
#define I_SYNC_QUEUED (1 << 16)
#define I_PINNING_NETFS_WB (1 << 17)
+#define I_DIR_LOCK_WAITER (1 << 30)
+#define I_DIR_LOCKED (1 << 31)
+
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
--
2.47.0
next prev parent reply other threads:[~2024-12-20 3:09 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-20 2:54 [PATCH 00/11 RFC] Allow concurrent changes in a directory NeilBrown
2024-12-20 2:54 ` [PATCH 01/11] VFS: introduce vfs_mkdir_return() NeilBrown
2024-12-23 5:04 ` Al Viro
2024-12-23 7:26 ` NeilBrown
2024-12-20 2:54 ` [PATCH 02/11] VFS: add _shared versions of the various directory modifying inode_operations NeilBrown
2024-12-23 5:08 ` Al Viro
2024-12-20 2:54 ` [PATCH 03/11] VFS: use global wait-queue table for d_alloc_parallel() NeilBrown
2024-12-20 2:54 ` [PATCH 04/11] VFS: use d_alloc_parallel() in lookup_one_qstr_excl() NeilBrown
2024-12-20 2:54 ` [PATCH 05/11] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry NeilBrown
2024-12-20 2:54 ` [PATCH 06/11] VFS: introduce done_lookup_and_lock() NeilBrown
2024-12-20 2:54 ` [PATCH 07/11] VFS: introduce lookup_and_lock() NeilBrown
2024-12-20 2:54 ` NeilBrown [this message]
2024-12-21 1:21 ` [PATCH 08/11] VFS: add inode_dir_lock/unlock Hillf Danton
2024-12-23 3:10 ` NeilBrown
2024-12-23 11:12 ` Hillf Danton
2024-12-23 20:36 ` NeilBrown
2024-12-24 10:26 ` Hillf Danton
2024-12-20 2:54 ` [PATCH 09/11] VFS: re-pack DENTRY_ flags NeilBrown
2024-12-20 2:54 ` [PATCH 10/11] VFS: take a shared lock for create/remove directory operations NeilBrown
2024-12-23 5:19 ` Al Viro
2024-12-23 7:11 ` NeilBrown
2024-12-23 7:26 ` Al Viro
2024-12-23 20:40 ` NeilBrown
2024-12-20 2:54 ` [PATCH 11/11] nfsd: use lookup_and_lock_one() NeilBrown
2024-12-20 20:55 ` [PATCH 00/11 RFC] Allow concurrent changes in a directory Andreas Dilger
2024-12-23 5:22 ` Al Viro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241220030830.272429-9-neilb@suse.de \
--to=neilb@suse.de \
--cc=brauner@kernel.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox