* [PATCH v4 01/14] debugfs: rename end_creating() to debugfs_end_creating()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-30 5:32 ` Greg Kroah-Hartman
2025-10-29 23:31 ` [PATCH v4 02/14] VFS: introduce start_dirop() and end_dirop() NeilBrown
` (12 subsequent siblings)
13 siblings, 1 reply; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
By not using the generic end_creating() name here we are free to use it
more globally for a more generic function.
This should have been done when start_creating() was renamed.
For consistency, also rename failed_creating().
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/debugfs/inode.c | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 661a99a7dfbe..f241b9df642a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -403,7 +403,7 @@ static struct dentry *debugfs_start_creating(const char *name,
return dentry;
}
-static struct dentry *failed_creating(struct dentry *dentry)
+static struct dentry *debugfs_failed_creating(struct dentry *dentry)
{
inode_unlock(d_inode(dentry->d_parent));
dput(dentry);
@@ -411,7 +411,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
return ERR_PTR(-ENOMEM);
}
-static struct dentry *end_creating(struct dentry *dentry)
+static struct dentry *debugfs_end_creating(struct dentry *dentry)
{
inode_unlock(d_inode(dentry->d_parent));
return dentry;
@@ -435,7 +435,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
return dentry;
if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
- failed_creating(dentry);
+ debugfs_failed_creating(dentry);
return ERR_PTR(-EPERM);
}
@@ -443,7 +443,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
if (unlikely(!inode)) {
pr_err("out of free dentries, can not create file '%s'\n",
name);
- return failed_creating(dentry);
+ return debugfs_failed_creating(dentry);
}
inode->i_mode = mode;
@@ -458,7 +458,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
d_instantiate(dentry, inode);
fsnotify_create(d_inode(dentry->d_parent), dentry);
- return end_creating(dentry);
+ return debugfs_end_creating(dentry);
}
struct dentry *debugfs_create_file_full(const char *name, umode_t mode,
@@ -585,7 +585,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
return dentry;
if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
- failed_creating(dentry);
+ debugfs_failed_creating(dentry);
return ERR_PTR(-EPERM);
}
@@ -593,7 +593,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
if (unlikely(!inode)) {
pr_err("out of free dentries, can not create directory '%s'\n",
name);
- return failed_creating(dentry);
+ return debugfs_failed_creating(dentry);
}
inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
@@ -605,7 +605,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
d_instantiate(dentry, inode);
inc_nlink(d_inode(dentry->d_parent));
fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
- return end_creating(dentry);
+ return debugfs_end_creating(dentry);
}
EXPORT_SYMBOL_GPL(debugfs_create_dir);
@@ -632,7 +632,7 @@ struct dentry *debugfs_create_automount(const char *name,
return dentry;
if (!(debugfs_allow & DEBUGFS_ALLOW_API)) {
- failed_creating(dentry);
+ debugfs_failed_creating(dentry);
return ERR_PTR(-EPERM);
}
@@ -640,7 +640,7 @@ struct dentry *debugfs_create_automount(const char *name,
if (unlikely(!inode)) {
pr_err("out of free dentries, can not create automount '%s'\n",
name);
- return failed_creating(dentry);
+ return debugfs_failed_creating(dentry);
}
make_empty_dir_inode(inode);
@@ -652,7 +652,7 @@ struct dentry *debugfs_create_automount(const char *name,
d_instantiate(dentry, inode);
inc_nlink(d_inode(dentry->d_parent));
fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
- return end_creating(dentry);
+ return debugfs_end_creating(dentry);
}
EXPORT_SYMBOL(debugfs_create_automount);
@@ -699,13 +699,13 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
pr_err("out of free dentries, can not create symlink '%s'\n",
name);
kfree(link);
- return failed_creating(dentry);
+ return debugfs_failed_creating(dentry);
}
inode->i_mode = S_IFLNK | S_IRWXUGO;
inode->i_op = &debugfs_symlink_inode_operations;
inode->i_link = link;
d_instantiate(dentry, inode);
- return end_creating(dentry);
+ return debugfs_end_creating(dentry);
}
EXPORT_SYMBOL_GPL(debugfs_create_symlink);
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v4 01/14] debugfs: rename end_creating() to debugfs_end_creating()
2025-10-29 23:31 ` [PATCH v4 01/14] debugfs: rename end_creating() to debugfs_end_creating() NeilBrown
@ 2025-10-30 5:32 ` Greg Kroah-Hartman
0 siblings, 0 replies; 24+ messages in thread
From: Greg Kroah-Hartman @ 2025-10-30 5:32 UTC (permalink / raw)
To: NeilBrown
Cc: Alexander Viro, Christian Brauner, Amir Goldstein, Jan Kara,
linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Rafael J. Wysocki, Danilo Krummrich, Tyler Hicks,
Miklos Szeredi, Chuck Lever, Olga Kornievskaia, Dai Ngo,
Namjae Jeon, Steve French, Sergey Senozhatsky, Carlos Maiolino,
John Johansen, Paul Moore, James Morris, Serge E. Hallyn,
Stephen Smalley, Ondrej Mosnacek, Mateusz Guzik, Lorenzo Stoakes,
Stefan Berger, Darrick J. Wong, linux-kernel, netfs, ecryptfs,
linux-nfs, linux-unionfs, linux-cifs, linux-xfs, apparmor,
linux-security-module, selinux
On Thu, Oct 30, 2025 at 10:31:01AM +1100, NeilBrown wrote:
> From: NeilBrown <neil@brown.name>
>
> By not using the generic end_creating() name here we are free to use it
> more globally for a more generic function.
> This should have been done when start_creating() was renamed.
>
> For consistency, also rename failed_creating().
>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> Reviewed-by: Jeff Layton <jlayton@kernel.org>
> Signed-off-by: NeilBrown <neil@brown.name>
> ---
> fs/debugfs/inode.c | 26 +++++++++++++-------------
> 1 file changed, 13 insertions(+), 13 deletions(-)
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v4 02/14] VFS: introduce start_dirop() and end_dirop()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
2025-10-29 23:31 ` [PATCH v4 01/14] debugfs: rename end_creating() to debugfs_end_creating() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-29 23:31 ` [PATCH v4 03/14] VFS: tidy up do_unlinkat() NeilBrown
` (11 subsequent siblings)
13 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
The fact that directory operations (create,remove,rename) are protected
by a lock on the parent is known widely throughout the kernel.
In order to change this - to instead lock the target dentry - it is
best to centralise this knowledge so it can be changed in one place.
This patch introduces start_dirop() which is local to VFS code.
It performs the required locking for create and remove. Rename
will be handled separately.
Various functions with names like start_creating() or start_removing_path(),
some of which already exist, will export this functionality beyond the VFS.
end_dirop() is the partner of start_dirop(). It drops the lock and
releases the reference on the dentry.
It *is* exported so that various end_creating etc functions can be inline.
As vfs_mkdir() drops the dentry on error we cannot use end_dirop() as
that won't unlock when the dentry IS_ERR(). For now we need an explicit
unlock when dentry IS_ERR(). I hope to change vfs_mkdir() to unlock
when it drops a dentry so that explicit unlock can go away.
end_dirop() can always be called on the result of start_dirop(), but not
after vfs_mkdir(). After a vfs_mkdir() we still may need the explicit
unlock as seen in end_creating_path().
As well as adding start_dirop() and end_dirop()
this patch uses them in:
- simple_start_creating (which requires sharing lookup_noperm_common()
with libfs.c)
- start_removing_path / start_removing_user_path_at
- filename_create / end_creating_path()
- do_rmdir(), do_unlinkat()
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/internal.h | 3 ++
fs/libfs.c | 36 ++++++++---------
fs/namei.c | 98 ++++++++++++++++++++++++++++++++++------------
include/linux/fs.h | 2 +
4 files changed, 95 insertions(+), 44 deletions(-)
diff --git a/fs/internal.h b/fs/internal.h
index 9b2b4d116880..d08d5e2235e9 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -67,6 +67,9 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
const struct path *parentpath,
struct file *file, umode_t mode);
struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
+struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
+ unsigned int lookup_flags);
+int lookup_noperm_common(struct qstr *qname, struct dentry *base);
/*
* namespace.c
diff --git a/fs/libfs.c b/fs/libfs.c
index ce8c496a6940..02371f45ef7d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -2289,27 +2289,25 @@ void stashed_dentry_prune(struct dentry *dentry)
cmpxchg(stashed, dentry, NULL);
}
-/* parent must be held exclusive */
+/**
+ * simple_start_creating - prepare to create a given name
+ * @parent: directory in which to prepare to create the name
+ * @name: the name to be created
+ *
+ * Required lock is taken and a lookup in performed prior to creating an
+ * object in a directory. No permission checking is performed.
+ *
+ * Returns: a negative dentry on which vfs_create() or similar may
+ * be attempted, or an error.
+ */
struct dentry *simple_start_creating(struct dentry *parent, const char *name)
{
- struct dentry *dentry;
- struct inode *dir = d_inode(parent);
+ struct qstr qname = QSTR(name);
+ int err;
- inode_lock(dir);
- if (unlikely(IS_DEADDIR(dir))) {
- inode_unlock(dir);
- return ERR_PTR(-ENOENT);
- }
- dentry = lookup_noperm(&QSTR(name), parent);
- if (IS_ERR(dentry)) {
- inode_unlock(dir);
- return dentry;
- }
- if (dentry->d_inode) {
- dput(dentry);
- inode_unlock(dir);
- return ERR_PTR(-EEXIST);
- }
- return dentry;
+ err = lookup_noperm_common(&qname, parent);
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, &qname, LOOKUP_CREATE | LOOKUP_EXCL);
}
EXPORT_SYMBOL(simple_start_creating);
diff --git a/fs/namei.c b/fs/namei.c
index 7377020a2cba..3618efd4bcaa 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2765,6 +2765,48 @@ static int filename_parentat(int dfd, struct filename *name,
return __filename_parentat(dfd, name, flags, parent, last, type, NULL);
}
+/**
+ * start_dirop - begin a create or remove dirop, performing locking and lookup
+ * @parent: the dentry of the parent in which the operation will occur
+ * @name: a qstr holding the name within that parent
+ * @lookup_flags: intent and other lookup flags.
+ *
+ * The lookup is performed and necessary locks are taken so that, on success,
+ * the returned dentry can be operated on safely.
+ * The qstr must already have the hash value calculated.
+ *
+ * Returns: a locked dentry, or an error.
+ *
+ */
+struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
+ unsigned int lookup_flags)
+{
+ struct dentry *dentry;
+ struct inode *dir = d_inode(parent);
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ dentry = lookup_one_qstr_excl(name, parent, lookup_flags);
+ if (IS_ERR(dentry))
+ inode_unlock(dir);
+ return dentry;
+}
+
+/**
+ * end_dirop - signal completion of a dirop
+ * @de: the dentry which was returned by start_dirop or similar.
+ *
+ * If the de is an error, nothing happens. Otherwise any lock taken to
+ * protect the dentry is dropped and the dentry itself is release (dput()).
+ */
+void end_dirop(struct dentry *de)
+{
+ if (!IS_ERR(de)) {
+ inode_unlock(de->d_parent->d_inode);
+ dput(de);
+ }
+}
+EXPORT_SYMBOL(end_dirop);
+
/* does lookup, returns the object with parent locked */
static struct dentry *__start_removing_path(int dfd, struct filename *name,
struct path *path)
@@ -2781,10 +2823,9 @@ static struct dentry *__start_removing_path(int dfd, struct filename *name,
return ERR_PTR(-EINVAL);
/* don't fail immediately if it's r/o, at least try to report other errors */
error = mnt_want_write(parent_path.mnt);
- inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT);
- d = lookup_one_qstr_excl(&last, parent_path.dentry, 0);
+ d = start_dirop(parent_path.dentry, &last, 0);
if (IS_ERR(d))
- goto unlock;
+ goto drop;
if (error)
goto fail;
path->dentry = no_free_ptr(parent_path.dentry);
@@ -2792,10 +2833,9 @@ static struct dentry *__start_removing_path(int dfd, struct filename *name,
return d;
fail:
- dput(d);
+ end_dirop(d);
d = ERR_PTR(error);
-unlock:
- inode_unlock(parent_path.dentry->d_inode);
+drop:
if (!error)
mnt_drop_write(parent_path.mnt);
return d;
@@ -2910,7 +2950,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(vfs_path_lookup);
-static int lookup_noperm_common(struct qstr *qname, struct dentry *base)
+int lookup_noperm_common(struct qstr *qname, struct dentry *base)
{
const char *name = qname->name;
u32 len = qname->len;
@@ -4223,21 +4263,18 @@ static struct dentry *filename_create(int dfd, struct filename *name,
*/
if (last.name[last.len] && !want_dir)
create_flags &= ~LOOKUP_CREATE;
- inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
- dentry = lookup_one_qstr_excl(&last, path->dentry,
- reval_flag | create_flags);
+ dentry = start_dirop(path->dentry, &last, reval_flag | create_flags);
if (IS_ERR(dentry))
- goto unlock;
+ goto out_drop_write;
if (unlikely(error))
goto fail;
return dentry;
fail:
- dput(dentry);
+ end_dirop(dentry);
dentry = ERR_PTR(error);
-unlock:
- inode_unlock(path->dentry->d_inode);
+out_drop_write:
if (!error)
mnt_drop_write(path->mnt);
out:
@@ -4256,11 +4293,26 @@ struct dentry *start_creating_path(int dfd, const char *pathname,
}
EXPORT_SYMBOL(start_creating_path);
+/**
+ * end_creating_path - finish a code section started by start_creating_path()
+ * @path: the path instantiated by start_creating_path()
+ * @dentry: the dentry returned by start_creating_path()
+ *
+ * end_creating_path() will unlock and locks taken by start_creating_path()
+ * and drop an references that were taken. It should only be called
+ * if start_creating_path() returned a non-error.
+ * If vfs_mkdir() was called and it returned an error, that error *should*
+ * be passed to end_creating_path() together with the path.
+ */
void end_creating_path(const struct path *path, struct dentry *dentry)
{
- if (!IS_ERR(dentry))
- dput(dentry);
- inode_unlock(path->dentry->d_inode);
+ if (IS_ERR(dentry))
+ /* The parent is still locked despite the error from
+ * vfs_mkdir() - must unlock it.
+ */
+ inode_unlock(path->dentry->d_inode);
+ else
+ end_dirop(dentry);
mnt_drop_write(path->mnt);
path_put(path);
}
@@ -4592,8 +4644,7 @@ int do_rmdir(int dfd, struct filename *name)
if (error)
goto exit2;
- inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
- dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
+ dentry = start_dirop(path.dentry, &last, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit3;
@@ -4602,9 +4653,8 @@ int do_rmdir(int dfd, struct filename *name)
goto exit4;
error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry);
exit4:
- dput(dentry);
+ end_dirop(dentry);
exit3:
- inode_unlock(path.dentry->d_inode);
mnt_drop_write(path.mnt);
exit2:
path_put(&path);
@@ -4721,8 +4771,7 @@ int do_unlinkat(int dfd, struct filename *name)
if (error)
goto exit2;
retry_deleg:
- inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
- dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
+ dentry = start_dirop(path.dentry, &last, lookup_flags);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
@@ -4737,9 +4786,8 @@ int do_unlinkat(int dfd, struct filename *name)
error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
dentry, &delegated_inode);
exit3:
- dput(dentry);
+ end_dirop(dentry);
}
- inode_unlock(path.dentry->d_inode);
if (inode)
iput(inode); /* truncate the inode here */
inode = NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..f4543612ef1e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3609,6 +3609,8 @@ extern void iterate_supers_type(struct file_system_type *,
void filesystems_freeze(void);
void filesystems_thaw(void);
+void end_dirop(struct dentry *de);
+
extern int dcache_dir_open(struct inode *, struct file *);
extern int dcache_dir_close(struct inode *, struct file *);
extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v4 03/14] VFS: tidy up do_unlinkat()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
2025-10-29 23:31 ` [PATCH v4 01/14] debugfs: rename end_creating() to debugfs_end_creating() NeilBrown
2025-10-29 23:31 ` [PATCH v4 02/14] VFS: introduce start_dirop() and end_dirop() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-29 23:31 ` [PATCH v4 04/14] VFS/nfsd/cachefiles/ovl: add start_creating() and end_creating() NeilBrown
` (10 subsequent siblings)
13 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
The simplification of locking in the previous patch opens up some room
for tidying up do_unlinkat()
- change all "exit" labels to describe what will happen at the label.
- always goto an exit label on an error - unwrap the "if (!IS_ERR())" branch.
- Move the "slashes" handing inline, but mark it as unlikely()
- simplify use of the "inode" variable - we no longer need to test for NULL.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/namei.c | 55 ++++++++++++++++++++++++++----------------------------
1 file changed, 26 insertions(+), 29 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index 3618efd4bcaa..9effaad115d9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4755,65 +4755,62 @@ int do_unlinkat(int dfd, struct filename *name)
struct path path;
struct qstr last;
int type;
- struct inode *inode = NULL;
+ struct inode *inode;
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
if (error)
- goto exit1;
+ goto exit_putname;
error = -EISDIR;
if (type != LAST_NORM)
- goto exit2;
+ goto exit_path_put;
error = mnt_want_write(path.mnt);
if (error)
- goto exit2;
+ goto exit_path_put;
retry_deleg:
dentry = start_dirop(path.dentry, &last, lookup_flags);
error = PTR_ERR(dentry);
- if (!IS_ERR(dentry)) {
+ if (IS_ERR(dentry))
+ goto exit_drop_write;
- /* Why not before? Because we want correct error value */
- if (last.name[last.len])
- goto slashes;
- inode = dentry->d_inode;
- ihold(inode);
- error = security_path_unlink(&path, dentry);
- if (error)
- goto exit3;
- error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
- dentry, &delegated_inode);
-exit3:
+ /* Why not before? Because we want correct error value */
+ if (unlikely(last.name[last.len])) {
+ if (d_is_dir(dentry))
+ error = -EISDIR;
+ else
+ error = -ENOTDIR;
end_dirop(dentry);
+ goto exit_drop_write;
}
- if (inode)
- iput(inode); /* truncate the inode here */
- inode = NULL;
+ inode = dentry->d_inode;
+ ihold(inode);
+ error = security_path_unlink(&path, dentry);
+ if (error)
+ goto exit_end_dirop;
+ error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
+ dentry, &delegated_inode);
+exit_end_dirop:
+ end_dirop(dentry);
+ iput(inode); /* truncate the inode here */
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
goto retry_deleg;
}
+exit_drop_write:
mnt_drop_write(path.mnt);
-exit2:
+exit_path_put:
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
- inode = NULL;
goto retry;
}
-exit1:
+exit_putname:
putname(name);
return error;
-
-slashes:
- if (d_is_dir(dentry))
- error = -EISDIR;
- else
- error = -ENOTDIR;
- goto exit3;
}
SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v4 04/14] VFS/nfsd/cachefiles/ovl: add start_creating() and end_creating()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (2 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 03/14] VFS: tidy up do_unlinkat() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-29 23:31 ` [PATCH v4 05/14] VFS/nfsd/cachefiles/ovl: introduce start_removing() and end_removing() NeilBrown
` (9 subsequent siblings)
13 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
start_creating() is similar to simple_start_creating() but is not so
simple.
It takes a qstr for the name, includes permission checking, and does NOT
report an error if the name already exists, returning a positive dentry
instead.
This is currently used by nfsd, cachefiles, and overlayfs.
end_creating() is called after the dentry has been used.
end_creating() drops the reference to the dentry as it is generally no
longer needed. This is exactly the first section of end_creating_path()
so that function is changed to call the new end_creating()
These calls help encapsulate locking rules so that directory locking can
be changed.
Occasionally this change means that the parent lock is held for a
shorter period of time, for example in cachefiles_commit_tmpfile().
As this function now unlocks after an unlink and before the following
lookup, it is possible that the lookup could again find a positive
dentry, so a while loop is introduced there.
In overlayfs the ovl_lookup_temp() function has ovl_tempname()
split out to be used in ovl_start_creating_temp(). The other use
of ovl_lookup_temp() is preparing for a rename. When rename handling
is updated, ovl_lookup_temp() will be removed.
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/cachefiles/namei.c | 41 ++++++++---------
fs/namei.c | 35 ++++++++++++---
fs/nfsd/nfs3proc.c | 14 +++---
fs/nfsd/nfs4proc.c | 14 +++---
fs/nfsd/nfs4recover.c | 16 +++----
fs/nfsd/nfsproc.c | 11 +++--
fs/nfsd/vfs.c | 52 +++++++++-------------
fs/overlayfs/copy_up.c | 19 ++++----
fs/overlayfs/dir.c | 96 +++++++++++++++++++++++-----------------
fs/overlayfs/overlayfs.h | 8 ++++
fs/overlayfs/super.c | 32 +++++++-------
include/linux/namei.h | 33 ++++++++++++++
12 files changed, 213 insertions(+), 158 deletions(-)
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index d1edb2ac3837..0a136eb434da 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -93,12 +93,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
_enter(",,%s", dirname);
/* search the current directory for the element name */
- inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
retry:
ret = cachefiles_inject_read_error();
if (ret == 0)
- subdir = lookup_one(&nop_mnt_idmap, &QSTR(dirname), dir);
+ subdir = start_creating(&nop_mnt_idmap, dir, &QSTR(dirname));
else
subdir = ERR_PTR(ret);
trace_cachefiles_lookup(NULL, dir, subdir);
@@ -141,7 +140,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
trace_cachefiles_mkdir(dir, subdir);
if (unlikely(d_unhashed(subdir) || d_is_negative(subdir))) {
- dput(subdir);
+ end_creating(subdir, dir);
goto retry;
}
ASSERT(d_backing_inode(subdir));
@@ -154,7 +153,8 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
/* Tell rmdir() it's not allowed to delete the subdir */
inode_lock(d_inode(subdir));
- inode_unlock(d_inode(dir));
+ dget(subdir);
+ end_creating(subdir, dir);
if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) {
pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
@@ -196,14 +196,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
return ERR_PTR(-EBUSY);
mkdir_error:
- inode_unlock(d_inode(dir));
- if (!IS_ERR(subdir))
- dput(subdir);
+ end_creating(subdir, dir);
pr_err("mkdir %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
lookup_error:
- inode_unlock(d_inode(dir));
ret = PTR_ERR(subdir);
pr_err("Lookup %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
@@ -679,36 +676,41 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
_enter(",%pD", object->file);
- inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
ret = cachefiles_inject_read_error();
if (ret == 0)
- dentry = lookup_one(&nop_mnt_idmap, &QSTR(object->d_name), fan);
+ dentry = start_creating(&nop_mnt_idmap, fan, &QSTR(object->d_name));
else
dentry = ERR_PTR(ret);
if (IS_ERR(dentry)) {
trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry),
cachefiles_trace_lookup_error);
_debug("lookup fail %ld", PTR_ERR(dentry));
- goto out_unlock;
+ goto out;
}
- if (!d_is_negative(dentry)) {
+ /*
+ * This loop will only execute more than once if some other thread
+ * races to create the object we are trying to create.
+ */
+ while (!d_is_negative(dentry)) {
ret = cachefiles_unlink(volume->cache, object, fan, dentry,
FSCACHE_OBJECT_IS_STALE);
if (ret < 0)
- goto out_dput;
+ goto out_end;
+
+ end_creating(dentry, fan);
- dput(dentry);
ret = cachefiles_inject_read_error();
if (ret == 0)
- dentry = lookup_one(&nop_mnt_idmap, &QSTR(object->d_name), fan);
+ dentry = start_creating(&nop_mnt_idmap, fan,
+ &QSTR(object->d_name));
else
dentry = ERR_PTR(ret);
if (IS_ERR(dentry)) {
trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry),
cachefiles_trace_lookup_error);
_debug("lookup fail %ld", PTR_ERR(dentry));
- goto out_unlock;
+ goto out;
}
}
@@ -729,10 +731,9 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
success = true;
}
-out_dput:
- dput(dentry);
-out_unlock:
- inode_unlock(d_inode(fan));
+out_end:
+ end_creating(dentry, fan);
+out:
_leave(" = %u", success);
return success;
}
diff --git a/fs/namei.c b/fs/namei.c
index 9effaad115d9..9972b0257a4c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3221,6 +3221,33 @@ struct dentry *lookup_noperm_positive_unlocked(struct qstr *name,
}
EXPORT_SYMBOL(lookup_noperm_positive_unlocked);
+/**
+ * start_creating - prepare to create a given name with permission checking
+ * @idmap: idmap of the mount
+ * @parent: directory in which to prepare to create the name
+ * @name: the name to be created
+ *
+ * Locks are taken and a lookup is performed prior to creating
+ * an object in a directory. Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name already exists, a positive dentry is returned, so
+ * behaviour is similar to O_CREAT without O_EXCL, which doesn't fail
+ * with -EEXIST.
+ *
+ * Returns: a negative or positive dentry, or an error.
+ */
+struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_one_common(idmap, name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, LOOKUP_CREATE);
+}
+EXPORT_SYMBOL(start_creating);
+
#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
@@ -4306,13 +4333,7 @@ EXPORT_SYMBOL(start_creating_path);
*/
void end_creating_path(const struct path *path, struct dentry *dentry)
{
- if (IS_ERR(dentry))
- /* The parent is still locked despite the error from
- * vfs_mkdir() - must unlock it.
- */
- inode_unlock(path->dentry->d_inode);
- else
- end_dirop(dentry);
+ end_creating(dentry, path->dentry);
mnt_drop_write(path->mnt);
path_put(path);
}
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b6d03e1ef5f7..e2aac0def2cb 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -281,14 +281,11 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (host_err)
return nfserrno(host_err);
- inode_lock_nested(inode, I_MUTEX_PARENT);
-
- child = lookup_one(&nop_mnt_idmap,
- &QSTR_LEN(argp->name, argp->len),
- parent);
+ child = start_creating(&nop_mnt_idmap, parent,
+ &QSTR_LEN(argp->name, argp->len));
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
- goto out;
+ goto out_write;
}
if (d_really_is_negative(child)) {
@@ -367,9 +364,8 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs);
out:
- inode_unlock(inode);
- if (child && !IS_ERR(child))
- dput(child);
+ end_creating(child, parent);
+out_write:
fh_drop_write(fhp);
return status;
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e466cf52d7d7..b2c95e8e7c68 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -264,14 +264,11 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (is_create_with_attrs(open))
nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs);
- inode_lock_nested(inode, I_MUTEX_PARENT);
-
- child = lookup_one(&nop_mnt_idmap,
- &QSTR_LEN(open->op_fname, open->op_fnamelen),
- parent);
+ child = start_creating(&nop_mnt_idmap, parent,
+ &QSTR_LEN(open->op_fname, open->op_fnamelen));
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
- goto out;
+ goto out_write;
}
if (d_really_is_negative(child)) {
@@ -379,10 +376,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (attrs.na_aclerr)
open->op_bmval[0] &= ~FATTR4_WORD0_ACL;
out:
- inode_unlock(inode);
+ end_creating(child, parent);
nfsd_attrs_free(&attrs);
- if (child && !IS_ERR(child))
- dput(child);
+out_write:
fh_drop_write(fhp);
return status;
}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e2b9472e5c78..c247a7c3291c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -195,13 +195,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
goto out_creds;
dir = nn->rec_file->f_path.dentry;
- /* lock the parent */
- inode_lock(d_inode(dir));
- dentry = lookup_one(&nop_mnt_idmap, &QSTR(dname), dir);
+ dentry = start_creating(&nop_mnt_idmap, dir, &QSTR(dname));
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
- goto out_unlock;
+ goto out;
}
if (d_really_is_positive(dentry))
/*
@@ -212,15 +210,13 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
* In the 4.0 case, we should never get here; but we may
* as well be forgiving and just succeed silently.
*/
- goto out_put;
+ goto out_end;
dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
if (IS_ERR(dentry))
status = PTR_ERR(dentry);
-out_put:
- if (!status)
- dput(dentry);
-out_unlock:
- inode_unlock(d_inode(dir));
+out_end:
+ end_creating(dentry, dir);
+out:
if (status == 0) {
if (nn->in_grace)
__nfsd4_create_reclaim_record_grace(clp, dname,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 8f71f5748c75..ee1b16e921fd 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -306,18 +306,16 @@ nfsd_proc_create(struct svc_rqst *rqstp)
goto done;
}
- inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT);
- dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(argp->name, argp->len),
- dirfhp->fh_dentry);
+ dchild = start_creating(&nop_mnt_idmap, dirfhp->fh_dentry,
+ &QSTR_LEN(argp->name, argp->len));
if (IS_ERR(dchild)) {
resp->status = nfserrno(PTR_ERR(dchild));
- goto out_unlock;
+ goto out_write;
}
fh_init(newfhp, NFS_FHSIZE);
resp->status = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp);
if (!resp->status && d_really_is_negative(dchild))
resp->status = nfserr_noent;
- dput(dchild);
if (resp->status) {
if (resp->status != nfserr_noent)
goto out_unlock;
@@ -423,7 +421,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
}
out_unlock:
- inode_unlock(dirfhp->fh_dentry->d_inode);
+ end_creating(dchild, dirfhp->fh_dentry);
+out_write:
fh_drop_write(dirfhp);
done:
fh_put(dirfhp);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9cb20d4aeab1..4efd3688e081 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1521,7 +1521,7 @@ nfsd_check_ignore_resizing(struct iattr *iap)
iap->ia_valid &= ~ATTR_SIZE;
}
-/* The parent directory should already be locked: */
+/* The parent directory should already be locked - we will unlock */
__be32
nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfsd_attrs *attrs,
@@ -1587,8 +1587,9 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
out:
- if (!IS_ERR(dchild))
- dput(dchild);
+ if (!err)
+ fh_fill_post_attrs(fhp);
+ end_creating(dchild, dentry);
return err;
out_nfserr:
@@ -1626,28 +1627,26 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (host_err)
return nfserrno(host_err);
- inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
- dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
+ dchild = start_creating(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
host_err = PTR_ERR(dchild);
- if (IS_ERR(dchild)) {
- err = nfserrno(host_err);
- goto out_unlock;
- }
+ if (IS_ERR(dchild))
+ return nfserrno(host_err);
+
err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
/*
* We unconditionally drop our ref to dchild as fh_compose will have
* already grabbed its own ref for it.
*/
- dput(dchild);
if (err)
goto out_unlock;
err = fh_fill_pre_attrs(fhp);
if (err != nfs_ok)
goto out_unlock;
err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
- fh_fill_post_attrs(fhp);
+ return err;
+
out_unlock:
- inode_unlock(dentry->d_inode);
+ end_creating(dchild, dentry);
return err;
}
@@ -1733,11 +1732,9 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
dentry = fhp->fh_dentry;
- inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
- dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
+ dnew = start_creating(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
if (IS_ERR(dnew)) {
err = nfserrno(PTR_ERR(dnew));
- inode_unlock(dentry->d_inode);
goto out_drop_write;
}
err = fh_fill_pre_attrs(fhp);
@@ -1750,11 +1747,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
fh_fill_post_attrs(fhp);
out_unlock:
- inode_unlock(dentry->d_inode);
+ end_creating(dnew, dentry);
if (!err)
err = nfserrno(commit_metadata(fhp));
- dput(dnew);
- if (err==0) err = cerr;
+ if (!err)
+ err = cerr;
out_drop_write:
fh_drop_write(fhp);
out:
@@ -1809,32 +1806,31 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
ddir = ffhp->fh_dentry;
dirp = d_inode(ddir);
- inode_lock_nested(dirp, I_MUTEX_PARENT);
+ dnew = start_creating(&nop_mnt_idmap, ddir, &QSTR_LEN(name, len));
- dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(name, len), ddir);
if (IS_ERR(dnew)) {
host_err = PTR_ERR(dnew);
- goto out_unlock;
+ goto out_drop_write;
}
dold = tfhp->fh_dentry;
err = nfserr_noent;
if (d_really_is_negative(dold))
- goto out_dput;
+ goto out_unlock;
err = fh_fill_pre_attrs(ffhp);
if (err != nfs_ok)
- goto out_dput;
+ goto out_unlock;
host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL);
fh_fill_post_attrs(ffhp);
- inode_unlock(dirp);
+out_unlock:
+ end_creating(dnew, ddir);
if (!host_err) {
host_err = commit_metadata(ffhp);
if (!host_err)
host_err = commit_metadata(tfhp);
}
- dput(dnew);
out_drop_write:
fh_drop_write(tfhp);
if (host_err == -EBUSY) {
@@ -1849,12 +1845,6 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
}
out:
return err != nfs_ok ? err : nfserrno(host_err);
-
-out_dput:
- dput(dnew);
-out_unlock:
- inode_unlock(dirp);
- goto out_drop_write;
}
static void
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index aac7e34f56c1..7a31ca9bdea2 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -613,9 +613,9 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
if (err)
goto out;
- inode_lock_nested(udir, I_MUTEX_PARENT);
- upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir,
- c->dentry->d_name.len);
+ upper = ovl_start_creating_upper(ofs, upperdir,
+ &QSTR_LEN(c->dentry->d_name.name,
+ c->dentry->d_name.len));
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper);
@@ -626,9 +626,8 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
ovl_dentry_set_upper_alias(c->dentry);
ovl_dentry_update_reval(c->dentry, upper);
}
- dput(upper);
+ end_creating(upper, upperdir);
}
- inode_unlock(udir);
if (err)
goto out;
@@ -894,16 +893,14 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
if (err)
goto out;
- inode_lock_nested(udir, I_MUTEX_PARENT);
-
- upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir,
- c->destname.len);
+ upper = ovl_start_creating_upper(ofs, c->destdir,
+ &QSTR_LEN(c->destname.name,
+ c->destname.len));
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
err = ovl_do_link(ofs, temp, udir, upper);
- dput(upper);
+ end_creating(upper, c->destdir);
}
- inode_unlock(udir);
if (err)
goto out;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index a5e9ddf3023b..a8a24abee6b3 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -59,15 +59,21 @@ int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir,
return 0;
}
-struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir)
+#define OVL_TEMPNAME_SIZE 20
+static void ovl_tempname(char name[OVL_TEMPNAME_SIZE])
{
- struct dentry *temp;
- char name[20];
static atomic_t temp_id = ATOMIC_INIT(0);
/* counter is allowed to wrap, since temp dentries are ephemeral */
- snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
+ snprintf(name, OVL_TEMPNAME_SIZE, "#%x", atomic_inc_return(&temp_id));
+}
+
+struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir)
+{
+ struct dentry *temp;
+ char name[OVL_TEMPNAME_SIZE];
+ ovl_tempname(name);
temp = ovl_lookup_upper(ofs, name, workdir, strlen(name));
if (!IS_ERR(temp) && temp->d_inode) {
pr_err("workdir/%s already exists\n", name);
@@ -78,45 +84,49 @@ struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir)
return temp;
}
+static struct dentry *ovl_start_creating_temp(struct ovl_fs *ofs,
+ struct dentry *workdir)
+{
+ char name[OVL_TEMPNAME_SIZE];
+
+ ovl_tempname(name);
+ return start_creating(ovl_upper_mnt_idmap(ofs), workdir,
+ &QSTR(name));
+}
+
static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
{
int err;
- struct dentry *whiteout;
+ struct dentry *whiteout, *link;
struct dentry *workdir = ofs->workdir;
struct inode *wdir = workdir->d_inode;
guard(mutex)(&ofs->whiteout_lock);
if (!ofs->whiteout) {
- inode_lock_nested(wdir, I_MUTEX_PARENT);
- whiteout = ovl_lookup_temp(ofs, workdir);
- if (!IS_ERR(whiteout)) {
- err = ovl_do_whiteout(ofs, wdir, whiteout);
- if (err) {
- dput(whiteout);
- whiteout = ERR_PTR(err);
- }
- }
- inode_unlock(wdir);
+ whiteout = ovl_start_creating_temp(ofs, workdir);
if (IS_ERR(whiteout))
return whiteout;
- ofs->whiteout = whiteout;
+ err = ovl_do_whiteout(ofs, wdir, whiteout);
+ if (!err)
+ ofs->whiteout = dget(whiteout);
+ end_creating(whiteout, workdir);
+ if (err)
+ return ERR_PTR(err);
}
if (!ofs->no_shared_whiteout) {
- inode_lock_nested(wdir, I_MUTEX_PARENT);
- whiteout = ovl_lookup_temp(ofs, workdir);
- if (!IS_ERR(whiteout)) {
- err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout);
- if (err) {
- dput(whiteout);
- whiteout = ERR_PTR(err);
- }
- }
- inode_unlock(wdir);
- if (!IS_ERR(whiteout))
- return whiteout;
- if (PTR_ERR(whiteout) != -EMLINK) {
+ link = ovl_start_creating_temp(ofs, workdir);
+ if (IS_ERR(link))
+ return link;
+ err = ovl_do_link(ofs, ofs->whiteout, wdir, link);
+ if (!err)
+ whiteout = dget(link);
+ end_creating(link, workdir);
+ if (!err)
+ return whiteout;;
+
+ if (err != -EMLINK) {
pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%lu)\n",
ofs->whiteout->d_inode->i_nlink,
PTR_ERR(whiteout));
@@ -252,10 +262,13 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
struct ovl_cattr *attr)
{
struct dentry *ret;
- inode_lock_nested(workdir->d_inode, I_MUTEX_PARENT);
- ret = ovl_create_real(ofs, workdir,
- ovl_lookup_temp(ofs, workdir), attr);
- inode_unlock(workdir->d_inode);
+ ret = ovl_start_creating_temp(ofs, workdir);
+ if (IS_ERR(ret))
+ return ret;
+ ret = ovl_create_real(ofs, workdir, ret, attr);
+ if (!IS_ERR(ret))
+ dget(ret);
+ end_creating(ret, workdir);
return ret;
}
@@ -354,18 +367,21 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
- struct inode *udir = upperdir->d_inode;
struct dentry *newdentry;
int err;
- inode_lock_nested(udir, I_MUTEX_PARENT);
- newdentry = ovl_create_real(ofs, upperdir,
- ovl_lookup_upper(ofs, dentry->d_name.name,
- upperdir, dentry->d_name.len),
- attr);
- inode_unlock(udir);
+ newdentry = ovl_start_creating_upper(ofs, upperdir,
+ &QSTR_LEN(dentry->d_name.name,
+ dentry->d_name.len));
if (IS_ERR(newdentry))
return PTR_ERR(newdentry);
+ newdentry = ovl_create_real(ofs, upperdir, newdentry, attr);
+ if (IS_ERR(newdentry)) {
+ end_creating(newdentry, upperdir);
+ return PTR_ERR(newdentry);
+ }
+ dget(newdentry);
+ end_creating(newdentry, upperdir);
if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) &&
!ovl_allow_offline_changes(ofs)) {
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index c8fd5951fc5e..beeba96cfcb2 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -415,6 +415,14 @@ static inline struct dentry *ovl_lookup_upper_unlocked(struct ovl_fs *ofs,
&QSTR_LEN(name, len), base);
}
+static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs,
+ struct dentry *parent,
+ struct qstr *name)
+{
+ return start_creating(ovl_upper_mnt_idmap(ofs),
+ parent, name);
+}
+
static inline bool ovl_open_flags_need_copy_up(int flags)
{
if (!flags)
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 43ee4c7296a7..6e0816c1147a 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -310,8 +310,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
bool retried = false;
retry:
- inode_lock_nested(dir, I_MUTEX_PARENT);
- work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name));
+ work = ovl_start_creating_upper(ofs, ofs->workbasedir, &QSTR(name));
if (!IS_ERR(work)) {
struct iattr attr = {
@@ -320,14 +319,13 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
};
if (work->d_inode) {
+ dget(work);
+ end_creating(work, ofs->workbasedir);
+ if (persist)
+ return work;
err = -EEXIST;
- inode_unlock(dir);
if (retried)
goto out_dput;
-
- if (persist)
- return work;
-
retried = true;
err = ovl_workdir_cleanup(ofs, ofs->workbasedir, mnt, work, 0);
dput(work);
@@ -338,7 +336,9 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
}
work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode);
- inode_unlock(dir);
+ if (!IS_ERR(work))
+ dget(work);
+ end_creating(work, ofs->workbasedir);
err = PTR_ERR(work);
if (IS_ERR(work))
goto out_err;
@@ -376,7 +376,6 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
if (err)
goto out_dput;
} else {
- inode_unlock(dir);
err = PTR_ERR(work);
goto out_err;
}
@@ -626,14 +625,17 @@ static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs,
struct dentry *parent,
const char *name, umode_t mode)
{
- size_t len = strlen(name);
struct dentry *child;
- inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
- child = ovl_lookup_upper(ofs, name, parent, len);
- if (!IS_ERR(child) && !child->d_inode)
- child = ovl_create_real(ofs, parent, child, OVL_CATTR(mode));
- inode_unlock(parent->d_inode);
+ child = ovl_start_creating_upper(ofs, parent, &QSTR(name));
+ if (!IS_ERR(child)) {
+ if (!child->d_inode)
+ child = ovl_create_real(ofs, parent, child,
+ OVL_CATTR(mode));
+ if (!IS_ERR(child))
+ dget(child);
+ end_creating(child, parent);
+ }
dput(parent);
return child;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index fed86221c69c..3f92c1a16878 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -88,6 +88,39 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
struct qstr *name,
struct dentry *base);
+struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
+ struct qstr *name);
+
+/**
+ * end_creating - finish action started with start_creating
+ * @child: dentry returned by start_creating() or vfs_mkdir()
+ * @parent: dentry given to start_creating(),
+ *
+ * Unlock and release the child.
+ *
+ * Unlike end_dirop() this can only be called if start_creating() succeeded.
+ * It handles @child being and error as vfs_mkdir() might have converted the
+ * dentry to an error - in that case the parent still needs to be unlocked.
+ *
+ * If vfs_mkdir() was called then the value returned from that function
+ * should be given for @child rather than the original dentry, as vfs_mkdir()
+ * may have provided a new dentry. Even if vfs_mkdir() returns an error
+ * it must be given to end_creating().
+ *
+ * If vfs_mkdir() was not called, then @child will be a valid dentry and
+ * @parent will be ignored.
+ */
+static inline void end_creating(struct dentry *child, struct dentry *parent)
+{
+ if (IS_ERR(child))
+ /* The parent is still locked despite the error from
+ * vfs_mkdir() - must unlock it.
+ */
+ inode_unlock(parent->d_inode);
+ else
+ end_dirop(child);
+}
+
extern int follow_down_one(struct path *);
extern int follow_down(struct path *path, unsigned int flags);
extern int follow_up(struct path *);
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v4 05/14] VFS/nfsd/cachefiles/ovl: introduce start_removing() and end_removing()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (3 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 04/14] VFS/nfsd/cachefiles/ovl: add start_creating() and end_creating() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-29 23:31 ` [PATCH v4 06/14] VFS: introduce start_creating_noperm() and start_removing_noperm() NeilBrown
` (8 subsequent siblings)
13 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
start_removing() is similar to start_creating() but will only return a
positive dentry with the expectation that it will be removed. This is
used by nfsd, cachefiles, and overlayfs. They are changed to also use
end_removing() to terminate the action begun by start_removing(). This
is a simple alias for end_dirop().
Apart from changes to the error paths, as we no longer need to unlock on
a lookup error, an effect on callers is that they don't need to test if
the found dentry is positive or negative - they can be sure it is
positive.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/cachefiles/namei.c | 32 ++++++++++++++------------------
fs/namei.c | 27 +++++++++++++++++++++++++++
fs/nfsd/nfs4recover.c | 18 +++++-------------
fs/nfsd/vfs.c | 26 ++++++++++----------------
fs/overlayfs/dir.c | 15 +++++++--------
fs/overlayfs/overlayfs.h | 8 ++++++++
include/linux/namei.h | 18 ++++++++++++++++++
7 files changed, 89 insertions(+), 55 deletions(-)
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 0a136eb434da..c7f0c6ab9b88 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -260,6 +260,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
* - File backed objects are unlinked
* - Directory backed objects are stuffed into the graveyard for userspace to
* delete
+ * On entry dir must be locked. It will be unlocked on exit.
*/
int cachefiles_bury_object(struct cachefiles_cache *cache,
struct cachefiles_object *object,
@@ -274,28 +275,30 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
_enter(",'%pd','%pd'", dir, rep);
+ /* end_removing() will dput() @rep but we need to keep
+ * a ref, so take one now. This also stops the dentry
+ * being negated when unlinked which we need.
+ */
+ dget(rep);
+
if (rep->d_parent != dir) {
- inode_unlock(d_inode(dir));
+ end_removing(rep);
_leave(" = -ESTALE");
return -ESTALE;
}
/* non-directories can just be unlinked */
if (!d_is_dir(rep)) {
- dget(rep); /* Stop the dentry being negated if it's only pinned
- * by a file struct.
- */
ret = cachefiles_unlink(cache, object, dir, rep, why);
- dput(rep);
+ end_removing(rep);
- inode_unlock(d_inode(dir));
_leave(" = %d", ret);
return ret;
}
/* directories have to be moved to the graveyard */
_debug("move stale object to graveyard");
- inode_unlock(d_inode(dir));
+ end_removing(rep);
try_again:
/* first step is to make up a grave dentry in the graveyard */
@@ -749,26 +752,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
struct dentry *victim;
int ret = -ENOENT;
- inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+ victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename));
- victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir);
if (IS_ERR(victim))
goto lookup_error;
- if (d_is_negative(victim))
- goto lookup_put;
if (d_inode(victim)->i_flags & S_KERNEL_FILE)
goto lookup_busy;
return victim;
lookup_busy:
ret = -EBUSY;
-lookup_put:
- inode_unlock(d_inode(dir));
- dput(victim);
+ end_removing(victim);
return ERR_PTR(ret);
lookup_error:
- inode_unlock(d_inode(dir));
ret = PTR_ERR(victim);
if (ret == -ENOENT)
return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
@@ -816,18 +813,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
ret = cachefiles_bury_object(cache, NULL, dir, victim,
FSCACHE_OBJECT_WAS_CULLED);
+ dput(victim);
if (ret < 0)
goto error;
fscache_count_culled();
- dput(victim);
_leave(" = 0");
return 0;
error_unlock:
- inode_unlock(d_inode(dir));
+ end_removing(victim);
error:
- dput(victim);
if (ret == -ENOENT)
return -ESTALE; /* Probably got retired by the netfs */
diff --git a/fs/namei.c b/fs/namei.c
index 9972b0257a4c..ae833dfa277c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3248,6 +3248,33 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
}
EXPORT_SYMBOL(start_creating);
+/**
+ * start_removing - prepare to remove a given name with permission checking
+ * @idmap: idmap of the mount
+ * @parent: directory in which to find the name
+ * @name: the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory. Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_one_common(idmap, name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, 0);
+}
+EXPORT_SYMBOL(start_removing);
+
#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index c247a7c3291c..3eefaa2202e3 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -324,20 +324,12 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
dir = nn->rec_file->f_path.dentry;
- inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
- dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir);
- if (IS_ERR(dentry)) {
- status = PTR_ERR(dentry);
- goto out_unlock;
- }
- status = -ENOENT;
- if (d_really_is_negative(dentry))
- goto out;
+ dentry = start_removing(&nop_mnt_idmap, dir, &QSTR(name));
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
-out:
- dput(dentry);
-out_unlock:
- inode_unlock(d_inode(dir));
+ end_removing(dentry);
return status;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4efd3688e081..cd64ffe12e0b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2044,7 +2044,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
{
struct dentry *dentry, *rdentry;
struct inode *dirp;
- struct inode *rinode;
+ struct inode *rinode = NULL;
__be32 err;
int host_err;
@@ -2063,24 +2063,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
- inode_lock_nested(dirp, I_MUTEX_PARENT);
- rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
+ rdentry = start_removing(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
+
host_err = PTR_ERR(rdentry);
if (IS_ERR(rdentry))
- goto out_unlock;
+ goto out_drop_write;
- if (d_really_is_negative(rdentry)) {
- dput(rdentry);
- host_err = -ENOENT;
- goto out_unlock;
- }
- rinode = d_inode(rdentry);
err = fh_fill_pre_attrs(fhp);
if (err != nfs_ok)
goto out_unlock;
+ rinode = d_inode(rdentry);
+ /* Prevent truncation until after locks dropped */
ihold(rinode);
+
if (!type)
type = d_inode(rdentry)->i_mode & S_IFMT;
@@ -2102,10 +2099,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
}
fh_fill_post_attrs(fhp);
- inode_unlock(dirp);
- if (!host_err)
+out_unlock:
+ end_removing(rdentry);
+ if (!err && !host_err)
host_err = commit_metadata(fhp);
- dput(rdentry);
iput(rinode); /* truncate the inode here */
out_drop_write:
@@ -2123,9 +2120,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
}
out:
return err != nfs_ok ? err : nfserrno(host_err);
-out_unlock:
- inode_unlock(dirp);
- goto out_drop_write;
}
/*
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index a8a24abee6b3..b5247c9e1903 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -866,17 +866,17 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
goto out;
}
- inode_lock_nested(dir, I_MUTEX_PARENT);
- upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ upper = ovl_start_removing_upper(ofs, upperdir,
+ &QSTR_LEN(dentry->d_name.name,
+ dentry->d_name.len));
err = PTR_ERR(upper);
if (IS_ERR(upper))
- goto out_unlock;
+ goto out_dput;
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
(!opaquedir && !ovl_matches_upper(dentry, upper)))
- goto out_dput_upper;
+ goto out_unlock;
if (is_dir)
err = ovl_do_rmdir(ofs, dir, upper);
@@ -892,10 +892,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
*/
if (!err)
d_drop(dentry);
-out_dput_upper:
- dput(upper);
out_unlock:
- inode_unlock(dir);
+ end_removing(upper);
+out_dput:
dput(opaquedir);
out:
return err;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index beeba96cfcb2..49ad65f829dc 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -423,6 +423,14 @@ static inline struct dentry *ovl_start_creating_upper(struct ovl_fs *ofs,
parent, name);
}
+static inline struct dentry *ovl_start_removing_upper(struct ovl_fs *ofs,
+ struct dentry *parent,
+ struct qstr *name)
+{
+ return start_removing(ovl_upper_mnt_idmap(ofs),
+ parent, name);
+}
+
static inline bool ovl_open_flags_need_copy_up(int flags)
{
if (!flags)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 3f92c1a16878..9ee76e88f3dd 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -90,6 +90,8 @@ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap,
struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
struct qstr *name);
+struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
+ struct qstr *name);
/**
* end_creating - finish action started with start_creating
@@ -121,6 +123,22 @@ static inline void end_creating(struct dentry *child, struct dentry *parent)
end_dirop(child);
}
+/**
+ * end_removing - finish action started with start_removing
+ * @child: dentry returned by start_removing()
+ * @parent: dentry given to start_removing()
+ *
+ * Unlock and release the child.
+ *
+ * This is identical to end_dirop(). It can be passed the result of
+ * start_removing() whether that was successful or not, but it not needed
+ * if start_removing() failed.
+ */
+static inline void end_removing(struct dentry *child)
+{
+ end_dirop(child);
+}
+
extern int follow_down_one(struct path *);
extern int follow_down(struct path *path, unsigned int flags);
extern int follow_up(struct path *);
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v4 06/14] VFS: introduce start_creating_noperm() and start_removing_noperm()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (4 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 05/14] VFS/nfsd/cachefiles/ovl: introduce start_removing() and end_removing() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-29 23:31 ` [PATCH v4 07/14] VFS: introduce start_removing_dentry() NeilBrown
` (7 subsequent siblings)
13 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
xfs, fuse, ipc/mqueue need variants of start_creating or start_removing
which do not check permissions.
This patch adds _noperm versions of these functions.
Note that do_mq_open() was only calling mntget() so it could call
path_put() - it didn't really need an extra reference on the mnt.
Now it doesn't call mntget() and uses end_creating() which does
the dput() half of path_put().
Also mq_unlink() previously passed
d_inode(dentry->d_parent)
as the dir inode to vfs_unlink(). This is after locking
d_inode(mnt->mnt_root)
These two inodes are the same, but normally calls use the textual
parent.
So I've changes the vfs_unlink() call to be given d_inode(mnt->mnt_root).
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neil@brown.name>
--
changes since v2:
- dir arg passed to vfs_unlink() in mq_unlink() changed to match
the dir passed to lookup_noperm()
- restore assignment to path->mnt even though the mntget() is removed.
---
fs/fuse/dir.c | 19 +++++++---------
fs/namei.c | 48 ++++++++++++++++++++++++++++++++++++++++
fs/xfs/scrub/orphanage.c | 11 ++++-----
include/linux/namei.h | 2 ++
ipc/mqueue.c | 32 ++++++++++-----------------
5 files changed, 74 insertions(+), 38 deletions(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ecaec0fea3a1..40ca94922349 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1397,27 +1397,25 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
if (!parent)
return -ENOENT;
- inode_lock_nested(parent, I_MUTEX_PARENT);
if (!S_ISDIR(parent->i_mode))
- goto unlock;
+ goto put_parent;
err = -ENOENT;
dir = d_find_alias(parent);
if (!dir)
- goto unlock;
+ goto put_parent;
- name->hash = full_name_hash(dir, name->name, name->len);
- entry = d_lookup(dir, name);
+ entry = start_removing_noperm(dir, name);
dput(dir);
- if (!entry)
- goto unlock;
+ if (IS_ERR(entry))
+ goto put_parent;
fuse_dir_changed(parent);
if (!(flags & FUSE_EXPIRE_ONLY))
d_invalidate(entry);
fuse_invalidate_entry_cache(entry);
- if (child_nodeid != 0 && d_really_is_positive(entry)) {
+ if (child_nodeid != 0) {
inode_lock(d_inode(entry));
if (get_node_id(d_inode(entry)) != child_nodeid) {
err = -ENOENT;
@@ -1445,10 +1443,9 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
} else {
err = 0;
}
- dput(entry);
- unlock:
- inode_unlock(parent);
+ end_removing(entry);
+ put_parent:
iput(parent);
return err;
}
diff --git a/fs/namei.c b/fs/namei.c
index ae833dfa277c..696e4b794416 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3275,6 +3275,54 @@ struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
}
EXPORT_SYMBOL(start_removing);
+/**
+ * start_creating_noperm - prepare to create a given name without permission checking
+ * @parent: directory in which to prepare to create the name
+ * @name: the name to be created
+ *
+ * Locks are taken and a lookup in performed prior to creating
+ * an object in a directory.
+ *
+ * If the name already exists, a positive dentry is returned.
+ *
+ * Returns: a negative or positive dentry, or an error.
+ */
+struct dentry *start_creating_noperm(struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_noperm_common(name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, LOOKUP_CREATE);
+}
+EXPORT_SYMBOL(start_creating_noperm);
+
+/**
+ * start_removing_noperm - prepare to remove a given name without permission checking
+ * @parent: directory in which to find the name
+ * @name: the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing_noperm(struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_noperm_common(name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, 0);
+}
+EXPORT_SYMBOL(start_removing_noperm);
+
#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index 9c12cb844231..e732605924a1 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -152,11 +152,10 @@ xrep_orphanage_create(
}
/* Try to find the orphanage directory. */
- inode_lock_nested(root_inode, I_MUTEX_PARENT);
- orphanage_dentry = lookup_noperm(&QSTR(ORPHANAGE), root_dentry);
+ orphanage_dentry = start_creating_noperm(root_dentry, &QSTR(ORPHANAGE));
if (IS_ERR(orphanage_dentry)) {
error = PTR_ERR(orphanage_dentry);
- goto out_unlock_root;
+ goto out_dput_root;
}
/*
@@ -170,7 +169,7 @@ xrep_orphanage_create(
orphanage_dentry, 0750);
error = PTR_ERR(orphanage_dentry);
if (IS_ERR(orphanage_dentry))
- goto out_unlock_root;
+ goto out_dput_orphanage;
}
/* Not a directory? Bail out. */
@@ -200,9 +199,7 @@ xrep_orphanage_create(
sc->orphanage_ilock_flags = 0;
out_dput_orphanage:
- dput(orphanage_dentry);
-out_unlock_root:
- inode_unlock(VFS_I(sc->mp->m_rootip));
+ end_creating(orphanage_dentry, root_dentry);
out_dput_root:
dput(root_dentry);
out:
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 9ee76e88f3dd..688e157d6afc 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -92,6 +92,8 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
struct qstr *name);
struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
struct qstr *name);
+struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name);
+struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name);
/**
* end_creating - finish action started with start_creating
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 093551fe66a7..6d7610310003 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -913,13 +913,12 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
goto out_putname;
ro = mnt_want_write(mnt); /* we'll drop it in any case */
- inode_lock(d_inode(root));
- path.dentry = lookup_noperm(&QSTR(name->name), root);
+ path.dentry = start_creating_noperm(root, &QSTR(name->name));
if (IS_ERR(path.dentry)) {
error = PTR_ERR(path.dentry);
goto out_putfd;
}
- path.mnt = mntget(mnt);
+ path.mnt = mnt;
error = prepare_open(path.dentry, oflag, ro, mode, name, attr);
if (!error) {
struct file *file = dentry_open(&path, oflag, current_cred());
@@ -928,13 +927,12 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
else
error = PTR_ERR(file);
}
- path_put(&path);
out_putfd:
if (error) {
put_unused_fd(fd);
fd = error;
}
- inode_unlock(d_inode(root));
+ end_creating(path.dentry, root);
if (!ro)
mnt_drop_write(mnt);
out_putname:
@@ -957,7 +955,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
int err;
struct filename *name;
struct dentry *dentry;
- struct inode *inode = NULL;
+ struct inode *inode;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
struct vfsmount *mnt = ipc_ns->mq_mnt;
@@ -969,26 +967,20 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
err = mnt_want_write(mnt);
if (err)
goto out_name;
- inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
- dentry = lookup_noperm(&QSTR(name->name), mnt->mnt_root);
+ dentry = start_removing_noperm(mnt->mnt_root, &QSTR(name->name));
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
- goto out_unlock;
+ goto out_drop_write;
}
inode = d_inode(dentry);
- if (!inode) {
- err = -ENOENT;
- } else {
- ihold(inode);
- err = vfs_unlink(&nop_mnt_idmap, d_inode(dentry->d_parent),
- dentry, NULL);
- }
- dput(dentry);
-
-out_unlock:
- inode_unlock(d_inode(mnt->mnt_root));
+ ihold(inode);
+ err = vfs_unlink(&nop_mnt_idmap, d_inode(mnt->mnt_root),
+ dentry, NULL);
+ end_removing(dentry);
iput(inode);
+
+out_drop_write:
mnt_drop_write(mnt);
out_name:
putname(name);
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v4 07/14] VFS: introduce start_removing_dentry()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (5 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 06/14] VFS: introduce start_creating_noperm() and start_removing_noperm() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-30 6:11 ` Al Viro
2025-10-29 23:31 ` [PATCH v4 08/14] VFS: add start_creating_killable() and start_removing_killable() NeilBrown
` (6 subsequent siblings)
13 siblings, 1 reply; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
start_removing_dentry() is similar to start_removing() but instead of
providing a name for lookup, the target dentry is given.
start_removing_dentry() checks that the dentry is still hashed and in
the parent, and if so it locks and increases the refcount so that
end_removing() can be used to finish the operation.
This is used in cachefiles, overlayfs, smb/server, and apparmor.
There will be other users including ecryptfs.
As start_removing_dentry() takes an extra reference to the dentry (to be
put by end_removing()), there is no need to explicitly take an extra
reference to stop d_delete() from using dentry_unlink_inode() to negate
the dentry - as in cachefiles_delete_object(), and ksmbd_vfs_unlink().
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/cachefiles/interface.c | 14 +++++++++-----
fs/cachefiles/namei.c | 24 ++++++++++++++----------
fs/cachefiles/volume.c | 10 +++++++---
fs/namei.c | 33 +++++++++++++++++++++++++++++++++
fs/overlayfs/dir.c | 10 ++++------
fs/overlayfs/readdir.c | 8 ++++----
fs/smb/server/vfs.c | 27 ++++-----------------------
include/linux/namei.h | 2 ++
security/apparmor/apparmorfs.c | 8 ++++----
9 files changed, 81 insertions(+), 55 deletions(-)
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 3e63cfe15874..3f8a6f1a8fc3 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -9,6 +9,7 @@
#include <linux/mount.h>
#include <linux/xattr.h>
#include <linux/file.h>
+#include <linux/namei.h>
#include <linux/falloc.h>
#include <trace/events/fscache.h>
#include "internal.h"
@@ -428,11 +429,14 @@ static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie)
if (!old_tmpfile) {
struct cachefiles_volume *volume = object->volume;
struct dentry *fan = volume->fanout[(u8)cookie->key_hash];
-
- inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
- cachefiles_bury_object(volume->cache, object, fan,
- old_file->f_path.dentry,
- FSCACHE_OBJECT_INVALIDATED);
+ struct dentry *obj;
+
+ obj = start_removing_dentry(fan, old_file->f_path.dentry);
+ if (!IS_ERR(obj))
+ cachefiles_bury_object(volume->cache, object,
+ fan, obj,
+ FSCACHE_OBJECT_INVALIDATED);
+ end_removing(obj);
}
fput(old_file);
}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index c7f0c6ab9b88..b97a40917a32 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -425,13 +425,12 @@ int cachefiles_delete_object(struct cachefiles_object *object,
_enter(",OBJ%x{%pD}", object->debug_id, object->file);
- /* Stop the dentry being negated if it's only pinned by a file struct. */
- dget(dentry);
-
- inode_lock_nested(d_backing_inode(fan), I_MUTEX_PARENT);
- ret = cachefiles_unlink(volume->cache, object, fan, dentry, why);
- inode_unlock(d_backing_inode(fan));
- dput(dentry);
+ dentry = start_removing_dentry(fan, dentry);
+ if (IS_ERR(dentry))
+ ret = PTR_ERR(dentry);
+ else
+ ret = cachefiles_unlink(volume->cache, object, fan, dentry, why);
+ end_removing(dentry);
return ret;
}
@@ -644,9 +643,14 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)
if (!d_is_reg(dentry)) {
pr_err("%pd is not a file\n", dentry);
- inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
- ret = cachefiles_bury_object(volume->cache, object, fan, dentry,
- FSCACHE_OBJECT_IS_WEIRD);
+ struct dentry *de = start_removing_dentry(fan, dentry);
+ if (IS_ERR(de))
+ ret = PTR_ERR(de);
+ else
+ ret = cachefiles_bury_object(volume->cache, object,
+ fan, de,
+ FSCACHE_OBJECT_IS_WEIRD);
+ end_removing(de);
dput(dentry);
if (ret < 0)
return false;
diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c
index 781aac4ef274..ddf95ff5daf0 100644
--- a/fs/cachefiles/volume.c
+++ b/fs/cachefiles/volume.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/namei.h>
#include "internal.h"
#include <trace/events/fscache.h>
@@ -58,9 +59,12 @@ void cachefiles_acquire_volume(struct fscache_volume *vcookie)
if (ret < 0) {
if (ret != -ESTALE)
goto error_dir;
- inode_lock_nested(d_inode(cache->store), I_MUTEX_PARENT);
- cachefiles_bury_object(cache, NULL, cache->store, vdentry,
- FSCACHE_VOLUME_IS_WEIRD);
+ vdentry = start_removing_dentry(cache->store, vdentry);
+ if (!IS_ERR(vdentry))
+ cachefiles_bury_object(cache, NULL, cache->store,
+ vdentry,
+ FSCACHE_VOLUME_IS_WEIRD);
+ end_removing(vdentry);
cachefiles_put_directory(volume->dentry);
cond_resched();
goto retry;
diff --git a/fs/namei.c b/fs/namei.c
index 696e4b794416..bfc443bec8a9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3323,6 +3323,39 @@ struct dentry *start_removing_noperm(struct dentry *parent,
}
EXPORT_SYMBOL(start_removing_noperm);
+/**
+ * start_removing_dentry - prepare to remove a given dentry
+ * @parent: directory from which dentry should be removed
+ * @child: the dentry to be removed
+ *
+ * A lock is taken to protect the dentry again other dirops and
+ * the validity of the dentry is checked: correct parent and still hashed.
+ *
+ * If the dentry is valid and positive, a reference is taken and
+ * returned. If not an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * Returns: the valid dentry, or an error.
+ */
+struct dentry *start_removing_dentry(struct dentry *parent,
+ struct dentry *child)
+{
+ inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
+ if (unlikely(IS_DEADDIR(parent->d_inode) ||
+ child->d_parent != parent ||
+ d_unhashed(child))) {
+ inode_unlock(parent->d_inode);
+ return ERR_PTR(-EINVAL);
+ }
+ if (d_is_negative(child)) {
+ inode_unlock(parent->d_inode);
+ return ERR_PTR(-ENOENT);
+ }
+ return dget(child);
+}
+EXPORT_SYMBOL(start_removing_dentry);
+
#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index b5247c9e1903..c8d0885ee5e0 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -47,14 +47,12 @@ static int ovl_cleanup_locked(struct ovl_fs *ofs, struct inode *wdir,
int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir,
struct dentry *wdentry)
{
- int err;
-
- err = ovl_parent_lock(workdir, wdentry);
- if (err)
- return err;
+ wdentry = start_removing_dentry(workdir, wdentry);
+ if (IS_ERR(wdentry))
+ return PTR_ERR(wdentry);
ovl_cleanup_locked(ofs, workdir->d_inode, wdentry);
- ovl_parent_unlock(workdir);
+ end_removing(wdentry);
return 0;
}
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 1e9792cc557b..77ecc39fc33a 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1242,11 +1242,11 @@ int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent,
if (!d_is_dir(dentry) || level > 1)
return ovl_cleanup(ofs, parent, dentry);
- err = ovl_parent_lock(parent, dentry);
- if (err)
- return err;
+ dentry = start_removing_dentry(parent, dentry);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
err = ovl_do_rmdir(ofs, parent->d_inode, dentry);
- ovl_parent_unlock(parent);
+ end_removing(dentry);
if (err) {
struct path path = { .mnt = mnt, .dentry = dentry };
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 891ed2dc2b73..7c4ddc43ab39 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -49,24 +49,6 @@ static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
i_uid_write(inode, i_uid_read(parent_inode));
}
-/**
- * ksmbd_vfs_lock_parent() - lock parent dentry if it is stable
- * @parent: parent dentry
- * @child: child dentry
- *
- * Returns: %0 on success, %-ENOENT if the parent dentry is not stable
- */
-int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child)
-{
- inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
- if (child->d_parent != parent) {
- inode_unlock(d_inode(parent));
- return -ENOENT;
- }
-
- return 0;
-}
-
static int ksmbd_vfs_path_lookup(struct ksmbd_share_config *share_conf,
char *pathname, unsigned int flags,
struct path *path, bool do_lock)
@@ -1084,18 +1066,17 @@ int ksmbd_vfs_unlink(struct file *filp)
return err;
dir = dget_parent(dentry);
- err = ksmbd_vfs_lock_parent(dir, dentry);
- if (err)
+ dentry = start_removing_dentry(dir, dentry);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
goto out;
- dget(dentry);
if (S_ISDIR(d_inode(dentry)->i_mode))
err = vfs_rmdir(idmap, d_inode(dir), dentry);
else
err = vfs_unlink(idmap, d_inode(dir), dentry, NULL);
- dput(dentry);
- inode_unlock(d_inode(dir));
+ end_removing(dentry);
if (err)
ksmbd_debug(VFS, "failed to delete, err %d\n", err);
out:
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 688e157d6afc..7e916e9d7726 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -94,6 +94,8 @@ struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
struct qstr *name);
struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name);
struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name);
+struct dentry *start_removing_dentry(struct dentry *parent,
+ struct dentry *child);
/**
* end_creating - finish action started with start_creating
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 391a586d0557..9d08d103f142 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -355,17 +355,17 @@ static void aafs_remove(struct dentry *dentry)
if (!dentry || IS_ERR(dentry))
return;
+ /* ->d_parent is stable as rename is not supported */
dir = d_inode(dentry->d_parent);
- inode_lock(dir);
- if (simple_positive(dentry)) {
+ dentry = start_removing_dentry(dentry->d_parent, dentry);
+ if (!IS_ERR(dentry) && simple_positive(dentry)) {
if (d_is_dir(dentry))
simple_rmdir(dir, dentry);
else
simple_unlink(dir, dentry);
d_delete(dentry);
- dput(dentry);
}
- inode_unlock(dir);
+ end_removing(dentry);
simple_release_fs(&aafs_mnt, &aafs_count);
}
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v4 07/14] VFS: introduce start_removing_dentry()
2025-10-29 23:31 ` [PATCH v4 07/14] VFS: introduce start_removing_dentry() NeilBrown
@ 2025-10-30 6:11 ` Al Viro
2025-10-30 23:22 ` NeilBrown
0 siblings, 1 reply; 24+ messages in thread
From: Al Viro @ 2025-10-30 6:11 UTC (permalink / raw)
To: NeilBrown
Cc: Christian Brauner, Amir Goldstein, Jan Kara, linux-fsdevel,
Jeff Layton, Chris Mason, David Sterba, David Howells,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Tyler Hicks, Miklos Szeredi, Chuck Lever, Olga Kornievskaia,
Dai Ngo, Namjae Jeon, Steve French, Sergey Senozhatsky,
Carlos Maiolino, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek, Mateusz Guzik,
Lorenzo Stoakes, Stefan Berger, Darrick J. Wong, linux-kernel,
netfs, ecryptfs, linux-nfs, linux-unionfs, linux-cifs, linux-xfs,
apparmor, linux-security-module, selinux
On Thu, Oct 30, 2025 at 10:31:07AM +1100, NeilBrown wrote:
> @@ -428,11 +429,14 @@ static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie)
> if (!old_tmpfile) {
> struct cachefiles_volume *volume = object->volume;
> struct dentry *fan = volume->fanout[(u8)cookie->key_hash];
> -
> - inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
> - cachefiles_bury_object(volume->cache, object, fan,
> - old_file->f_path.dentry,
> - FSCACHE_OBJECT_INVALIDATED);
> + struct dentry *obj;
> +
> + obj = start_removing_dentry(fan, old_file->f_path.dentry);
> + if (!IS_ERR(obj))
> + cachefiles_bury_object(volume->cache, object,
> + fan, obj,
> + FSCACHE_OBJECT_INVALIDATED);
> + end_removing(obj);
Huh? Where did you change cachefiles_bury_object to *not* unlock the parent?
Not in this commit, AFAICS, and that means at least a bisection hazard around
here...
Confused...
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH v4 07/14] VFS: introduce start_removing_dentry()
2025-10-30 6:11 ` Al Viro
@ 2025-10-30 23:22 ` NeilBrown
0 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-30 23:22 UTC (permalink / raw)
To: Al Viro
Cc: Christian Brauner, Amir Goldstein, Jan Kara, linux-fsdevel,
Jeff Layton, Chris Mason, David Sterba, David Howells,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Tyler Hicks, Miklos Szeredi, Chuck Lever, Olga Kornievskaia,
Dai Ngo, Namjae Jeon, Steve French, Sergey Senozhatsky,
Carlos Maiolino, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek, Mateusz Guzik,
Lorenzo Stoakes, Stefan Berger, Darrick J. Wong, linux-kernel,
netfs, ecryptfs, linux-nfs, linux-unionfs, linux-cifs, linux-xfs,
apparmor, linux-security-module, selinux
On Thu, 30 Oct 2025, Al Viro wrote:
> On Thu, Oct 30, 2025 at 10:31:07AM +1100, NeilBrown wrote:
>
> > @@ -428,11 +429,14 @@ static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie)
> > if (!old_tmpfile) {
> > struct cachefiles_volume *volume = object->volume;
> > struct dentry *fan = volume->fanout[(u8)cookie->key_hash];
> > -
> > - inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
> > - cachefiles_bury_object(volume->cache, object, fan,
> > - old_file->f_path.dentry,
> > - FSCACHE_OBJECT_INVALIDATED);
> > + struct dentry *obj;
> > +
> > + obj = start_removing_dentry(fan, old_file->f_path.dentry);
> > + if (!IS_ERR(obj))
> > + cachefiles_bury_object(volume->cache, object,
> > + fan, obj,
> > + FSCACHE_OBJECT_INVALIDATED);
> > + end_removing(obj);
>
> Huh? Where did you change cachefiles_bury_object to *not* unlock the parent?
> Not in this commit, AFAICS, and that means at least a bisection hazard around
> here...
>
> Confused...
>
Thanks for the review and for catching that error.
This incremental patch should fix it.
Thanks,
NeilBrown
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 3f8a6f1a8fc3..a08250d244ea 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -436,7 +436,6 @@ static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie)
cachefiles_bury_object(volume->cache, object,
fan, obj,
FSCACHE_OBJECT_INVALIDATED);
- end_removing(obj);
}
fput(old_file);
}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index b97a40917a32..0104ac00485d 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -261,6 +261,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
* - Directory backed objects are stuffed into the graveyard for userspace to
* delete
* On entry dir must be locked. It will be unlocked on exit.
+ * On entry there must be at least 2 refs on rep, one will be dropped on exit.
*/
int cachefiles_bury_object(struct cachefiles_cache *cache,
struct cachefiles_object *object,
@@ -275,12 +276,6 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
_enter(",'%pd','%pd'", dir, rep);
- /* end_removing() will dput() @rep but we need to keep
- * a ref, so take one now. This also stops the dentry
- * being negated when unlinked which we need.
- */
- dget(rep);
-
if (rep->d_parent != dir) {
end_removing(rep);
_leave(" = -ESTALE");
@@ -650,7 +645,6 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)
ret = cachefiles_bury_object(volume->cache, object,
fan, de,
FSCACHE_OBJECT_IS_WEIRD);
- end_removing(de);
dput(dentry);
if (ret < 0)
return false;
diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c
index ddf95ff5daf0..90ba926f488e 100644
--- a/fs/cachefiles/volume.c
+++ b/fs/cachefiles/volume.c
@@ -64,7 +64,6 @@ void cachefiles_acquire_volume(struct fscache_volume *vcookie)
cachefiles_bury_object(cache, NULL, cache->store,
vdentry,
FSCACHE_VOLUME_IS_WEIRD);
- end_removing(vdentry);
cachefiles_put_directory(volume->dentry);
cond_resched();
goto retry;
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PATCH v4 08/14] VFS: add start_creating_killable() and start_removing_killable()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (6 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 07/14] VFS: introduce start_removing_dentry() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-29 23:31 ` [PATCH v4 09/14] VFS/nfsd/ovl: introduce start_renaming() and end_renaming() NeilBrown
` (5 subsequent siblings)
13 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
These are similar to start_creating() and start_removing(), but allow a
fatal signal to abort waiting for the lock.
They are used in btrfs for subvol creation and removal.
btrfs_may_create() no longer needs IS_DEADDIR() and
start_creating_killable() includes that check.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/btrfs/ioctl.c | 41 +++++++---------------
fs/namei.c | 80 +++++++++++++++++++++++++++++++++++++++++--
include/linux/namei.h | 6 ++++
3 files changed, 95 insertions(+), 32 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 185bef0df1c2..4fbfdd8faf6a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -904,14 +904,9 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len);
int ret;
- ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
- if (ret == -EINTR)
- return ret;
-
- dentry = lookup_one(idmap, qname, parent);
- ret = PTR_ERR(dentry);
+ dentry = start_creating_killable(idmap, parent, qname);
if (IS_ERR(dentry))
- goto out_unlock;
+ return PTR_ERR(dentry);
ret = btrfs_may_create(idmap, dir, dentry);
if (ret)
@@ -940,9 +935,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
out_up_read:
up_read(&fs_info->subvol_sem);
out_dput:
- dput(dentry);
-out_unlock:
- btrfs_inode_unlock(BTRFS_I(dir), 0);
+ end_creating(dentry, parent);
return ret;
}
@@ -2417,18 +2410,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto free_subvol_name;
}
- ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
- if (ret == -EINTR)
- goto free_subvol_name;
- dentry = lookup_one(idmap, &QSTR(subvol_name), parent);
+ dentry = start_removing_killable(idmap, parent, &QSTR(subvol_name));
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
- goto out_unlock_dir;
- }
-
- if (d_really_is_negative(dentry)) {
- ret = -ENOENT;
- goto out_dput;
+ goto out_end_removing;
}
inode = d_inode(dentry);
@@ -2449,7 +2434,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
*/
ret = -EPERM;
if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
- goto out_dput;
+ goto out_end_removing;
/*
* Do not allow deletion if the parent dir is the same
@@ -2460,21 +2445,21 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
*/
ret = -EINVAL;
if (root == dest)
- goto out_dput;
+ goto out_end_removing;
ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC);
if (ret)
- goto out_dput;
+ goto out_end_removing;
}
/* check if subvolume may be deleted by a user */
ret = btrfs_may_delete(idmap, dir, dentry, 1);
if (ret)
- goto out_dput;
+ goto out_end_removing;
if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
ret = -EINVAL;
- goto out_dput;
+ goto out_end_removing;
}
btrfs_inode_lock(BTRFS_I(inode), 0);
@@ -2483,10 +2468,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (!ret)
d_delete_notify(dir, dentry);
-out_dput:
- dput(dentry);
-out_unlock_dir:
- btrfs_inode_unlock(BTRFS_I(dir), 0);
+out_end_removing:
+ end_removing(dentry);
free_subvol_name:
kfree(subvol_name_ptr);
free_parent:
diff --git a/fs/namei.c b/fs/namei.c
index bfc443bec8a9..04d2819bd351 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2778,19 +2778,33 @@ static int filename_parentat(int dfd, struct filename *name,
* Returns: a locked dentry, or an error.
*
*/
-struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
- unsigned int lookup_flags)
+static struct dentry *__start_dirop(struct dentry *parent, struct qstr *name,
+ unsigned int lookup_flags,
+ unsigned int state)
{
struct dentry *dentry;
struct inode *dir = d_inode(parent);
- inode_lock_nested(dir, I_MUTEX_PARENT);
+ if (state == TASK_KILLABLE) {
+ int ret = down_write_killable_nested(&dir->i_rwsem,
+ I_MUTEX_PARENT);
+ if (ret)
+ return ERR_PTR(ret);
+ } else {
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ }
dentry = lookup_one_qstr_excl(name, parent, lookup_flags);
if (IS_ERR(dentry))
inode_unlock(dir);
return dentry;
}
+struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
+ unsigned int lookup_flags)
+{
+ return __start_dirop(parent, name, lookup_flags, TASK_NORMAL);
+}
+
/**
* end_dirop - signal completion of a dirop
* @de: the dentry which was returned by start_dirop or similar.
@@ -3275,6 +3289,66 @@ struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
}
EXPORT_SYMBOL(start_removing);
+/**
+ * start_creating_killable - prepare to create a given name with permission checking
+ * @idmap: idmap of the mount
+ * @parent: directory in which to prepare to create the name
+ * @name: the name to be created
+ *
+ * Locks are taken and a lookup in performed prior to creating
+ * an object in a directory. Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name already exists, a positive dentry is returned.
+ *
+ * If a signal is received or was already pending, the function aborts
+ * with -EINTR;
+ *
+ * Returns: a negative or positive dentry, or an error.
+ */
+struct dentry *start_creating_killable(struct mnt_idmap *idmap,
+ struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_one_common(idmap, name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return __start_dirop(parent, name, LOOKUP_CREATE, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(start_creating_killable);
+
+/**
+ * start_removing_killable - prepare to remove a given name with permission checking
+ * @idmap: idmap of the mount
+ * @parent: directory in which to find the name
+ * @name: the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory. Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * If a signal is received or was already pending, the function aborts
+ * with -EINTR;
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing_killable(struct mnt_idmap *idmap,
+ struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_one_common(idmap, name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return __start_dirop(parent, name, 0, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(start_removing_killable);
+
/**
* start_creating_noperm - prepare to create a given name without permission checking
* @parent: directory in which to prepare to create the name
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 7e916e9d7726..e5cff89679df 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -92,6 +92,12 @@ struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
struct qstr *name);
struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
struct qstr *name);
+struct dentry *start_creating_killable(struct mnt_idmap *idmap,
+ struct dentry *parent,
+ struct qstr *name);
+struct dentry *start_removing_killable(struct mnt_idmap *idmap,
+ struct dentry *parent,
+ struct qstr *name);
struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name);
struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name);
struct dentry *start_removing_dentry(struct dentry *parent,
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v4 09/14] VFS/nfsd/ovl: introduce start_renaming() and end_renaming()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (7 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 08/14] VFS: add start_creating_killable() and start_removing_killable() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-30 13:22 ` Chuck Lever
2025-10-29 23:31 ` [PATCH v4 10/14] VFS/ovl/smb: introduce start_renaming_dentry() NeilBrown
` (4 subsequent siblings)
13 siblings, 1 reply; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
start_renaming() combines name lookup and locking to prepare for rename.
It is used when two names need to be looked up as in nfsd and overlayfs -
cases where one or both dentries are already available will be handled
separately.
__start_renaming() avoids the inode_permission check and hash
calculation and is suitable after filename_parentat() in do_renameat2().
It subsumes quite a bit of code from that function.
start_renaming() does calculate the hash and check X permission and is
suitable elsewhere:
- nfsd_rename()
- ovl_rename()
In ovl, ovl_do_rename_rd() is factored out of ovl_do_rename(), which
itself will be gone by the end of the series.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
--
Changes since v3:
- added missig dput() in ovl_rename when "whiteout" is not-NULL.
Changes since v2:
- in __start_renaming() some label have been renamed, and err
is always set before a "goto out_foo" rather than passing the
error in a dentry*.
- ovl_do_rename() changed to call the new ovl_do_rename_rd() rather
than keeping duplicate code
- code around ovl_cleanup() call in ovl_rename() restructured.
---
fs/namei.c | 197 ++++++++++++++++++++++++++++-----------
fs/nfsd/vfs.c | 73 +++++----------
fs/overlayfs/dir.c | 74 +++++++--------
fs/overlayfs/overlayfs.h | 23 +++--
include/linux/namei.h | 3 +
5 files changed, 218 insertions(+), 152 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index 04d2819bd351..0ee0a110b088 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3667,6 +3667,129 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
}
EXPORT_SYMBOL(unlock_rename);
+/**
+ * __start_renaming - lookup and lock names for rename
+ * @rd: rename data containing parent and flags, and
+ * for receiving found dentries
+ * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL,
+ * LOOKUP_NO_SYMLINKS etc).
+ * @old_last: name of object in @rd.old_parent
+ * @new_last: name of object in @rd.new_parent
+ *
+ * Look up two names and ensure locks are in place for
+ * rename.
+ *
+ * On success the found dentries are stored in @rd.old_dentry,
+ * @rd.new_dentry. These references and the lock are dropped by
+ * end_renaming().
+ *
+ * The passed in qstrs must have the hash calculated, and no permission
+ * checking is performed.
+ *
+ * Returns: zero or an error.
+ */
+static int
+__start_renaming(struct renamedata *rd, int lookup_flags,
+ struct qstr *old_last, struct qstr *new_last)
+{
+ struct dentry *trap;
+ struct dentry *d1, *d2;
+ int target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
+ int err;
+
+ if (rd->flags & RENAME_EXCHANGE)
+ target_flags = 0;
+ if (rd->flags & RENAME_NOREPLACE)
+ target_flags |= LOOKUP_EXCL;
+
+ trap = lock_rename(rd->old_parent, rd->new_parent);
+ if (IS_ERR(trap))
+ return PTR_ERR(trap);
+
+ d1 = lookup_one_qstr_excl(old_last, rd->old_parent,
+ lookup_flags);
+ err = PTR_ERR(d1);
+ if (IS_ERR(d1))
+ goto out_unlock;
+
+ d2 = lookup_one_qstr_excl(new_last, rd->new_parent,
+ lookup_flags | target_flags);
+ err = PTR_ERR(d2);
+ if (IS_ERR(d2))
+ goto out_dput_d1;
+
+ if (d1 == trap) {
+ /* source is an ancestor of target */
+ err = -EINVAL;
+ goto out_dput_d2;
+ }
+
+ if (d2 == trap) {
+ /* target is an ancestor of source */
+ if (rd->flags & RENAME_EXCHANGE)
+ err = -EINVAL;
+ else
+ err = -ENOTEMPTY;
+ goto out_dput_d2;
+ }
+
+ rd->old_dentry = d1;
+ rd->new_dentry = d2;
+ return 0;
+
+out_dput_d2:
+ dput(d2);
+out_dput_d1:
+ dput(d1);
+out_unlock:
+ unlock_rename(rd->old_parent, rd->new_parent);
+ return err;
+}
+
+/**
+ * start_renaming - lookup and lock names for rename with permission checking
+ * @rd: rename data containing parent and flags, and
+ * for receiving found dentries
+ * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL,
+ * LOOKUP_NO_SYMLINKS etc).
+ * @old_last: name of object in @rd.old_parent
+ * @new_last: name of object in @rd.new_parent
+ *
+ * Look up two names and ensure locks are in place for
+ * rename.
+ *
+ * On success the found dentries are stored in @rd.old_dentry,
+ * @rd.new_dentry. These references and the lock are dropped by
+ * end_renaming().
+ *
+ * The passed in qstrs need not have the hash calculated, and basic
+ * eXecute permission checking is performed against @rd.mnt_idmap.
+ *
+ * Returns: zero or an error.
+ */
+int start_renaming(struct renamedata *rd, int lookup_flags,
+ struct qstr *old_last, struct qstr *new_last)
+{
+ int err;
+
+ err = lookup_one_common(rd->mnt_idmap, old_last, rd->old_parent);
+ if (err)
+ return err;
+ err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent);
+ if (err)
+ return err;
+ return __start_renaming(rd, lookup_flags, old_last, new_last);
+}
+EXPORT_SYMBOL(start_renaming);
+
+void end_renaming(struct renamedata *rd)
+{
+ unlock_rename(rd->old_parent, rd->new_parent);
+ dput(rd->old_dentry);
+ dput(rd->new_dentry);
+}
+EXPORT_SYMBOL(end_renaming);
+
/**
* vfs_prepare_mode - prepare the mode to be used for a new inode
* @idmap: idmap of the mount the inode was found from
@@ -5504,14 +5627,11 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
struct filename *to, unsigned int flags)
{
struct renamedata rd;
- struct dentry *old_dentry, *new_dentry;
- struct dentry *trap;
struct path old_path, new_path;
struct qstr old_last, new_last;
int old_type, new_type;
struct inode *delegated_inode = NULL;
- unsigned int lookup_flags = 0, target_flags =
- LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
+ unsigned int lookup_flags = 0;
bool should_retry = false;
int error = -EINVAL;
@@ -5522,11 +5642,6 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
(flags & RENAME_EXCHANGE))
goto put_names;
- if (flags & RENAME_EXCHANGE)
- target_flags = 0;
- if (flags & RENAME_NOREPLACE)
- target_flags |= LOOKUP_EXCL;
-
retry:
error = filename_parentat(olddfd, from, lookup_flags, &old_path,
&old_last, &old_type);
@@ -5556,66 +5671,40 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
goto exit2;
retry_deleg:
- trap = lock_rename(new_path.dentry, old_path.dentry);
- if (IS_ERR(trap)) {
- error = PTR_ERR(trap);
+ rd.old_parent = old_path.dentry;
+ rd.mnt_idmap = mnt_idmap(old_path.mnt);
+ rd.new_parent = new_path.dentry;
+ rd.delegated_inode = &delegated_inode;
+ rd.flags = flags;
+
+ error = __start_renaming(&rd, lookup_flags, &old_last, &new_last);
+ if (error)
goto exit_lock_rename;
- }
- old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
- lookup_flags);
- error = PTR_ERR(old_dentry);
- if (IS_ERR(old_dentry))
- goto exit3;
- new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
- lookup_flags | target_flags);
- error = PTR_ERR(new_dentry);
- if (IS_ERR(new_dentry))
- goto exit4;
if (flags & RENAME_EXCHANGE) {
- if (!d_is_dir(new_dentry)) {
+ if (!d_is_dir(rd.new_dentry)) {
error = -ENOTDIR;
if (new_last.name[new_last.len])
- goto exit5;
+ goto exit_unlock;
}
}
/* unless the source is a directory trailing slashes give -ENOTDIR */
- if (!d_is_dir(old_dentry)) {
+ if (!d_is_dir(rd.old_dentry)) {
error = -ENOTDIR;
if (old_last.name[old_last.len])
- goto exit5;
+ goto exit_unlock;
if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
- goto exit5;
- }
- /* source should not be ancestor of target */
- error = -EINVAL;
- if (old_dentry == trap)
- goto exit5;
- /* target should not be an ancestor of source */
- if (!(flags & RENAME_EXCHANGE))
- error = -ENOTEMPTY;
- if (new_dentry == trap)
- goto exit5;
+ goto exit_unlock;
+ }
- error = security_path_rename(&old_path, old_dentry,
- &new_path, new_dentry, flags);
+ error = security_path_rename(&old_path, rd.old_dentry,
+ &new_path, rd.new_dentry, flags);
if (error)
- goto exit5;
+ goto exit_unlock;
- rd.old_parent = old_path.dentry;
- rd.old_dentry = old_dentry;
- rd.mnt_idmap = mnt_idmap(old_path.mnt);
- rd.new_parent = new_path.dentry;
- rd.new_dentry = new_dentry;
- rd.delegated_inode = &delegated_inode;
- rd.flags = flags;
error = vfs_rename(&rd);
-exit5:
- dput(new_dentry);
-exit4:
- dput(old_dentry);
-exit3:
- unlock_rename(new_path.dentry, old_path.dentry);
+exit_unlock:
+ end_renaming(&rd);
exit_lock_rename:
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index cd64ffe12e0b..62109885d4db 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1885,11 +1885,12 @@ __be32
nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
struct svc_fh *tfhp, char *tname, int tlen)
{
- struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap;
+ struct dentry *fdentry, *tdentry;
int type = S_IFDIR;
+ struct renamedata rd = {};
__be32 err;
int host_err;
- bool close_cached = false;
+ struct dentry *close_cached;
trace_nfsd_vfs_rename(rqstp, ffhp, tfhp, fname, flen, tname, tlen);
@@ -1915,15 +1916,22 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out;
retry:
+ close_cached = NULL;
host_err = fh_want_write(ffhp);
if (host_err) {
err = nfserrno(host_err);
goto out;
}
- trap = lock_rename(tdentry, fdentry);
- if (IS_ERR(trap)) {
- err = nfserr_xdev;
+ rd.mnt_idmap = &nop_mnt_idmap;
+ rd.old_parent = fdentry;
+ rd.new_parent = tdentry;
+
+ host_err = start_renaming(&rd, 0, &QSTR_LEN(fname, flen),
+ &QSTR_LEN(tname, tlen));
+
+ if (host_err) {
+ err = nfserrno(host_err);
goto out_want_write;
}
err = fh_fill_pre_attrs(ffhp);
@@ -1933,48 +1941,23 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (err != nfs_ok)
goto out_unlock;
- odentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), fdentry);
- host_err = PTR_ERR(odentry);
- if (IS_ERR(odentry))
- goto out_nfserr;
+ type = d_inode(rd.old_dentry)->i_mode & S_IFMT;
+
+ if (d_inode(rd.new_dentry))
+ type = d_inode(rd.new_dentry)->i_mode & S_IFMT;
- host_err = -ENOENT;
- if (d_really_is_negative(odentry))
- goto out_dput_old;
- host_err = -EINVAL;
- if (odentry == trap)
- goto out_dput_old;
- type = d_inode(odentry)->i_mode & S_IFMT;
-
- ndentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(tname, tlen), tdentry);
- host_err = PTR_ERR(ndentry);
- if (IS_ERR(ndentry))
- goto out_dput_old;
- if (d_inode(ndentry))
- type = d_inode(ndentry)->i_mode & S_IFMT;
- host_err = -ENOTEMPTY;
- if (ndentry == trap)
- goto out_dput_new;
-
- if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) &&
- nfsd_has_cached_files(ndentry)) {
- close_cached = true;
- goto out_dput_old;
+ if ((rd.new_dentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) &&
+ nfsd_has_cached_files(rd.new_dentry)) {
+ close_cached = dget(rd.new_dentry);
+ goto out_unlock;
} else {
- struct renamedata rd = {
- .mnt_idmap = &nop_mnt_idmap,
- .old_parent = fdentry,
- .old_dentry = odentry,
- .new_parent = tdentry,
- .new_dentry = ndentry,
- };
int retries;
for (retries = 1;;) {
host_err = vfs_rename(&rd);
if (host_err != -EAGAIN || !retries--)
break;
- if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry)))
+ if (!nfsd_wait_for_delegreturn(rqstp, d_inode(rd.old_dentry)))
break;
}
if (!host_err) {
@@ -1983,11 +1966,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
host_err = commit_metadata(ffhp);
}
}
- out_dput_new:
- dput(ndentry);
- out_dput_old:
- dput(odentry);
- out_nfserr:
if (host_err == -EBUSY) {
/*
* See RFC 8881 Section 18.26.4 para 1-3: NFSv4 RENAME
@@ -2006,7 +1984,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
fh_fill_post_attrs(tfhp);
}
out_unlock:
- unlock_rename(tdentry, fdentry);
+ end_renaming(&rd);
out_want_write:
fh_drop_write(ffhp);
@@ -2017,9 +1995,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
* until this point and then reattempt the whole shebang.
*/
if (close_cached) {
- close_cached = false;
- nfsd_close_cached_files(ndentry);
- dput(ndentry);
+ nfsd_close_cached_files(close_cached);
+ dput(close_cached);
goto retry;
}
out:
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index c8d0885ee5e0..0f2c2da68433 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -1124,9 +1124,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
int err;
struct dentry *old_upperdir;
struct dentry *new_upperdir;
- struct dentry *olddentry = NULL;
- struct dentry *newdentry = NULL;
- struct dentry *trap, *de;
+ struct renamedata rd = {};
bool old_opaque;
bool new_opaque;
bool cleanup_whiteout = false;
@@ -1136,6 +1134,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
bool new_is_dir = d_is_dir(new);
bool samedir = olddir == newdir;
struct dentry *opaquedir = NULL;
+ struct dentry *whiteout = NULL;
const struct cred *old_cred = NULL;
struct ovl_fs *ofs = OVL_FS(old->d_sb);
LIST_HEAD(list);
@@ -1233,29 +1232,21 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
}
}
- trap = lock_rename(new_upperdir, old_upperdir);
- if (IS_ERR(trap)) {
- err = PTR_ERR(trap);
- goto out_revert_creds;
- }
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = old_upperdir;
+ rd.new_parent = new_upperdir;
+ rd.flags = flags;
- de = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
- old->d_name.len);
- err = PTR_ERR(de);
- if (IS_ERR(de))
- goto out_unlock;
- olddentry = de;
+ err = start_renaming(&rd, 0,
+ &QSTR_LEN(old->d_name.name, old->d_name.len),
+ &QSTR_LEN(new->d_name.name, new->d_name.len));
- err = -ESTALE;
- if (!ovl_matches_upper(old, olddentry))
- goto out_unlock;
+ if (err)
+ goto out_revert_creds;
- de = ovl_lookup_upper(ofs, new->d_name.name, new_upperdir,
- new->d_name.len);
- err = PTR_ERR(de);
- if (IS_ERR(de))
+ err = -ESTALE;
+ if (!ovl_matches_upper(old, rd.old_dentry))
goto out_unlock;
- newdentry = de;
old_opaque = ovl_dentry_is_opaque(old);
new_opaque = ovl_dentry_is_opaque(new);
@@ -1263,15 +1254,15 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
err = -ESTALE;
if (d_inode(new) && ovl_dentry_upper(new)) {
if (opaquedir) {
- if (newdentry != opaquedir)
+ if (rd.new_dentry != opaquedir)
goto out_unlock;
} else {
- if (!ovl_matches_upper(new, newdentry))
+ if (!ovl_matches_upper(new, rd.new_dentry))
goto out_unlock;
}
} else {
- if (!d_is_negative(newdentry)) {
- if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry))
+ if (!d_is_negative(rd.new_dentry)) {
+ if (!new_opaque || !ovl_upper_is_whiteout(ofs, rd.new_dentry))
goto out_unlock;
} else {
if (flags & RENAME_EXCHANGE)
@@ -1279,19 +1270,14 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
}
}
- if (olddentry == trap)
- goto out_unlock;
- if (newdentry == trap)
- goto out_unlock;
-
- if (olddentry->d_inode == newdentry->d_inode)
+ if (rd.old_dentry->d_inode == rd.new_dentry->d_inode)
goto out_unlock;
err = 0;
if (ovl_type_merge_or_lower(old))
err = ovl_set_redirect(old, samedir);
else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
- err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
+ err = ovl_set_opaque_xerr(old, rd.old_dentry, -EXDEV);
if (err)
goto out_unlock;
@@ -1299,18 +1285,24 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
err = ovl_set_redirect(new, samedir);
else if (!overwrite && new_is_dir && !new_opaque &&
ovl_type_merge(old->d_parent))
- err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
+ err = ovl_set_opaque_xerr(new, rd.new_dentry, -EXDEV);
if (err)
goto out_unlock;
- err = ovl_do_rename(ofs, old_upperdir, olddentry,
- new_upperdir, newdentry, flags);
- unlock_rename(new_upperdir, old_upperdir);
+ err = ovl_do_rename_rd(&rd);
+
+ if (!err && cleanup_whiteout)
+ whiteout = dget(rd.new_dentry);
+
+ end_renaming(&rd);
+
if (err)
goto out_revert_creds;
- if (cleanup_whiteout)
- ovl_cleanup(ofs, old_upperdir, newdentry);
+ if (whiteout) {
+ ovl_cleanup(ofs, old_upperdir, whiteout);
+ dput(whiteout);
+ }
if (overwrite && d_inode(new)) {
if (new_is_dir)
@@ -1336,14 +1328,12 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
else
ovl_drop_write(old);
out:
- dput(newdentry);
- dput(olddentry);
dput(opaquedir);
ovl_cache_free(&list);
return err;
out_unlock:
- unlock_rename(new_upperdir, old_upperdir);
+ end_renaming(&rd);
goto out_revert_creds;
}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 49ad65f829dc..3cc85a893b5c 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -355,11 +355,24 @@ static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry,
return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name);
}
+static inline int ovl_do_rename_rd(struct renamedata *rd)
+{
+ int err;
+
+ pr_debug("rename(%pd2, %pd2, 0x%x)\n", rd->old_dentry, rd->new_dentry,
+ rd->flags);
+ err = vfs_rename(rd);
+ if (err) {
+ pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
+ rd->old_dentry, rd->new_dentry, err);
+ }
+ return err;
+}
+
static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir,
struct dentry *olddentry, struct dentry *newdir,
struct dentry *newdentry, unsigned int flags)
{
- int err;
struct renamedata rd = {
.mnt_idmap = ovl_upper_mnt_idmap(ofs),
.old_parent = olddir,
@@ -369,13 +382,7 @@ static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir,
.flags = flags,
};
- pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
- err = vfs_rename(&rd);
- if (err) {
- pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
- olddentry, newdentry, err);
- }
- return err;
+ return ovl_do_rename_rd(&rd);
}
static inline int ovl_do_whiteout(struct ovl_fs *ofs,
diff --git a/include/linux/namei.h b/include/linux/namei.h
index e5cff89679df..19c3d8e336d5 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -156,6 +156,9 @@ extern int follow_up(struct path *);
extern struct dentry *lock_rename(struct dentry *, struct dentry *);
extern struct dentry *lock_rename_child(struct dentry *, struct dentry *);
extern void unlock_rename(struct dentry *, struct dentry *);
+int start_renaming(struct renamedata *rd, int lookup_flags,
+ struct qstr *old_last, struct qstr *new_last);
+void end_renaming(struct renamedata *rd);
/**
* mode_strip_umask - handle vfs umask stripping
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v4 09/14] VFS/nfsd/ovl: introduce start_renaming() and end_renaming()
2025-10-29 23:31 ` [PATCH v4 09/14] VFS/nfsd/ovl: introduce start_renaming() and end_renaming() NeilBrown
@ 2025-10-30 13:22 ` Chuck Lever
0 siblings, 0 replies; 24+ messages in thread
From: Chuck Lever @ 2025-10-30 13:22 UTC (permalink / raw)
To: NeilBrown, Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Olga Kornievskaia,
Dai Ngo, Namjae Jeon, Steve French, Sergey Senozhatsky,
Carlos Maiolino, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek, Mateusz Guzik,
Lorenzo Stoakes, Stefan Berger, Darrick J. Wong, linux-kernel,
netfs, ecryptfs, linux-nfs, linux-unionfs, linux-cifs, linux-xfs,
apparmor, linux-security-module, selinux
On 10/29/25 7:31 PM, NeilBrown wrote:
> From: NeilBrown <neil@brown.name>
>
> start_renaming() combines name lookup and locking to prepare for rename.
> It is used when two names need to be looked up as in nfsd and overlayfs -
> cases where one or both dentries are already available will be handled
> separately.
>
> __start_renaming() avoids the inode_permission check and hash
> calculation and is suitable after filename_parentat() in do_renameat2().
> It subsumes quite a bit of code from that function.
>
> start_renaming() does calculate the hash and check X permission and is
> suitable elsewhere:
> - nfsd_rename()
> - ovl_rename()
>
> In ovl, ovl_do_rename_rd() is factored out of ovl_do_rename(), which
> itself will be gone by the end of the series.
>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> Signed-off-by: NeilBrown <neil@brown.name>
>
> --
> Changes since v3:
> - added missig dput() in ovl_rename when "whiteout" is not-NULL.
>
> Changes since v2:
> - in __start_renaming() some label have been renamed, and err
> is always set before a "goto out_foo" rather than passing the
> error in a dentry*.
> - ovl_do_rename() changed to call the new ovl_do_rename_rd() rather
> than keeping duplicate code
> - code around ovl_cleanup() call in ovl_rename() restructured.
> ---
> fs/namei.c | 197 ++++++++++++++++++++++++++++-----------
> fs/nfsd/vfs.c | 73 +++++----------
> fs/overlayfs/dir.c | 74 +++++++--------
> fs/overlayfs/overlayfs.h | 23 +++--
> include/linux/namei.h | 3 +
> 5 files changed, 218 insertions(+), 152 deletions(-)
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 04d2819bd351..0ee0a110b088 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3667,6 +3667,129 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
> }
> EXPORT_SYMBOL(unlock_rename);
>
> +/**
> + * __start_renaming - lookup and lock names for rename
> + * @rd: rename data containing parent and flags, and
> + * for receiving found dentries
> + * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL,
> + * LOOKUP_NO_SYMLINKS etc).
> + * @old_last: name of object in @rd.old_parent
> + * @new_last: name of object in @rd.new_parent
> + *
> + * Look up two names and ensure locks are in place for
> + * rename.
> + *
> + * On success the found dentries are stored in @rd.old_dentry,
> + * @rd.new_dentry. These references and the lock are dropped by
> + * end_renaming().
> + *
> + * The passed in qstrs must have the hash calculated, and no permission
> + * checking is performed.
> + *
> + * Returns: zero or an error.
> + */
> +static int
> +__start_renaming(struct renamedata *rd, int lookup_flags,
> + struct qstr *old_last, struct qstr *new_last)
> +{
> + struct dentry *trap;
> + struct dentry *d1, *d2;
> + int target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
> + int err;
> +
> + if (rd->flags & RENAME_EXCHANGE)
> + target_flags = 0;
> + if (rd->flags & RENAME_NOREPLACE)
> + target_flags |= LOOKUP_EXCL;
> +
> + trap = lock_rename(rd->old_parent, rd->new_parent);
> + if (IS_ERR(trap))
> + return PTR_ERR(trap);
> +
> + d1 = lookup_one_qstr_excl(old_last, rd->old_parent,
> + lookup_flags);
> + err = PTR_ERR(d1);
> + if (IS_ERR(d1))
> + goto out_unlock;
> +
> + d2 = lookup_one_qstr_excl(new_last, rd->new_parent,
> + lookup_flags | target_flags);
> + err = PTR_ERR(d2);
> + if (IS_ERR(d2))
> + goto out_dput_d1;
> +
> + if (d1 == trap) {
> + /* source is an ancestor of target */
> + err = -EINVAL;
> + goto out_dput_d2;
> + }
> +
> + if (d2 == trap) {
> + /* target is an ancestor of source */
> + if (rd->flags & RENAME_EXCHANGE)
> + err = -EINVAL;
> + else
> + err = -ENOTEMPTY;
> + goto out_dput_d2;
> + }
> +
> + rd->old_dentry = d1;
> + rd->new_dentry = d2;
> + return 0;
> +
> +out_dput_d2:
> + dput(d2);
> +out_dput_d1:
> + dput(d1);
> +out_unlock:
> + unlock_rename(rd->old_parent, rd->new_parent);
> + return err;
> +}
> +
> +/**
> + * start_renaming - lookup and lock names for rename with permission checking
> + * @rd: rename data containing parent and flags, and
> + * for receiving found dentries
> + * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL,
> + * LOOKUP_NO_SYMLINKS etc).
> + * @old_last: name of object in @rd.old_parent
> + * @new_last: name of object in @rd.new_parent
> + *
> + * Look up two names and ensure locks are in place for
> + * rename.
> + *
> + * On success the found dentries are stored in @rd.old_dentry,
> + * @rd.new_dentry. These references and the lock are dropped by
> + * end_renaming().
> + *
> + * The passed in qstrs need not have the hash calculated, and basic
> + * eXecute permission checking is performed against @rd.mnt_idmap.
> + *
> + * Returns: zero or an error.
> + */
> +int start_renaming(struct renamedata *rd, int lookup_flags,
> + struct qstr *old_last, struct qstr *new_last)
> +{
> + int err;
> +
> + err = lookup_one_common(rd->mnt_idmap, old_last, rd->old_parent);
> + if (err)
> + return err;
> + err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent);
> + if (err)
> + return err;
> + return __start_renaming(rd, lookup_flags, old_last, new_last);
> +}
> +EXPORT_SYMBOL(start_renaming);
> +
> +void end_renaming(struct renamedata *rd)
> +{
> + unlock_rename(rd->old_parent, rd->new_parent);
> + dput(rd->old_dentry);
> + dput(rd->new_dentry);
> +}
> +EXPORT_SYMBOL(end_renaming);
> +
> /**
> * vfs_prepare_mode - prepare the mode to be used for a new inode
> * @idmap: idmap of the mount the inode was found from
> @@ -5504,14 +5627,11 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> struct filename *to, unsigned int flags)
> {
> struct renamedata rd;
> - struct dentry *old_dentry, *new_dentry;
> - struct dentry *trap;
> struct path old_path, new_path;
> struct qstr old_last, new_last;
> int old_type, new_type;
> struct inode *delegated_inode = NULL;
> - unsigned int lookup_flags = 0, target_flags =
> - LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
> + unsigned int lookup_flags = 0;
> bool should_retry = false;
> int error = -EINVAL;
>
> @@ -5522,11 +5642,6 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> (flags & RENAME_EXCHANGE))
> goto put_names;
>
> - if (flags & RENAME_EXCHANGE)
> - target_flags = 0;
> - if (flags & RENAME_NOREPLACE)
> - target_flags |= LOOKUP_EXCL;
> -
> retry:
> error = filename_parentat(olddfd, from, lookup_flags, &old_path,
> &old_last, &old_type);
> @@ -5556,66 +5671,40 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> goto exit2;
>
> retry_deleg:
> - trap = lock_rename(new_path.dentry, old_path.dentry);
> - if (IS_ERR(trap)) {
> - error = PTR_ERR(trap);
> + rd.old_parent = old_path.dentry;
> + rd.mnt_idmap = mnt_idmap(old_path.mnt);
> + rd.new_parent = new_path.dentry;
> + rd.delegated_inode = &delegated_inode;
> + rd.flags = flags;
> +
> + error = __start_renaming(&rd, lookup_flags, &old_last, &new_last);
> + if (error)
> goto exit_lock_rename;
> - }
>
> - old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
> - lookup_flags);
> - error = PTR_ERR(old_dentry);
> - if (IS_ERR(old_dentry))
> - goto exit3;
> - new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
> - lookup_flags | target_flags);
> - error = PTR_ERR(new_dentry);
> - if (IS_ERR(new_dentry))
> - goto exit4;
> if (flags & RENAME_EXCHANGE) {
> - if (!d_is_dir(new_dentry)) {
> + if (!d_is_dir(rd.new_dentry)) {
> error = -ENOTDIR;
> if (new_last.name[new_last.len])
> - goto exit5;
> + goto exit_unlock;
> }
> }
> /* unless the source is a directory trailing slashes give -ENOTDIR */
> - if (!d_is_dir(old_dentry)) {
> + if (!d_is_dir(rd.old_dentry)) {
> error = -ENOTDIR;
> if (old_last.name[old_last.len])
> - goto exit5;
> + goto exit_unlock;
> if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
> - goto exit5;
> - }
> - /* source should not be ancestor of target */
> - error = -EINVAL;
> - if (old_dentry == trap)
> - goto exit5;
> - /* target should not be an ancestor of source */
> - if (!(flags & RENAME_EXCHANGE))
> - error = -ENOTEMPTY;
> - if (new_dentry == trap)
> - goto exit5;
> + goto exit_unlock;
> + }
>
> - error = security_path_rename(&old_path, old_dentry,
> - &new_path, new_dentry, flags);
> + error = security_path_rename(&old_path, rd.old_dentry,
> + &new_path, rd.new_dentry, flags);
> if (error)
> - goto exit5;
> + goto exit_unlock;
>
> - rd.old_parent = old_path.dentry;
> - rd.old_dentry = old_dentry;
> - rd.mnt_idmap = mnt_idmap(old_path.mnt);
> - rd.new_parent = new_path.dentry;
> - rd.new_dentry = new_dentry;
> - rd.delegated_inode = &delegated_inode;
> - rd.flags = flags;
> error = vfs_rename(&rd);
> -exit5:
> - dput(new_dentry);
> -exit4:
> - dput(old_dentry);
> -exit3:
> - unlock_rename(new_path.dentry, old_path.dentry);
> +exit_unlock:
> + end_renaming(&rd);
> exit_lock_rename:
> if (delegated_inode) {
> error = break_deleg_wait(&delegated_inode);
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index cd64ffe12e0b..62109885d4db 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -1885,11 +1885,12 @@ __be32
> nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
> struct svc_fh *tfhp, char *tname, int tlen)
> {
> - struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap;
> + struct dentry *fdentry, *tdentry;
> int type = S_IFDIR;
> + struct renamedata rd = {};
> __be32 err;
> int host_err;
> - bool close_cached = false;
> + struct dentry *close_cached;
>
> trace_nfsd_vfs_rename(rqstp, ffhp, tfhp, fname, flen, tname, tlen);
>
> @@ -1915,15 +1916,22 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
> goto out;
>
> retry:
> + close_cached = NULL;
> host_err = fh_want_write(ffhp);
> if (host_err) {
> err = nfserrno(host_err);
> goto out;
> }
>
> - trap = lock_rename(tdentry, fdentry);
> - if (IS_ERR(trap)) {
> - err = nfserr_xdev;
> + rd.mnt_idmap = &nop_mnt_idmap;
> + rd.old_parent = fdentry;
> + rd.new_parent = tdentry;
> +
> + host_err = start_renaming(&rd, 0, &QSTR_LEN(fname, flen),
> + &QSTR_LEN(tname, tlen));
> +
> + if (host_err) {
> + err = nfserrno(host_err);
> goto out_want_write;
> }
> err = fh_fill_pre_attrs(ffhp);
> @@ -1933,48 +1941,23 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
> if (err != nfs_ok)
> goto out_unlock;
>
> - odentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), fdentry);
> - host_err = PTR_ERR(odentry);
> - if (IS_ERR(odentry))
> - goto out_nfserr;
> + type = d_inode(rd.old_dentry)->i_mode & S_IFMT;
> +
> + if (d_inode(rd.new_dentry))
> + type = d_inode(rd.new_dentry)->i_mode & S_IFMT;
>
> - host_err = -ENOENT;
> - if (d_really_is_negative(odentry))
> - goto out_dput_old;
> - host_err = -EINVAL;
> - if (odentry == trap)
> - goto out_dput_old;
> - type = d_inode(odentry)->i_mode & S_IFMT;
> -
> - ndentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(tname, tlen), tdentry);
> - host_err = PTR_ERR(ndentry);
> - if (IS_ERR(ndentry))
> - goto out_dput_old;
> - if (d_inode(ndentry))
> - type = d_inode(ndentry)->i_mode & S_IFMT;
> - host_err = -ENOTEMPTY;
> - if (ndentry == trap)
> - goto out_dput_new;
> -
> - if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) &&
> - nfsd_has_cached_files(ndentry)) {
> - close_cached = true;
> - goto out_dput_old;
> + if ((rd.new_dentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) &&
> + nfsd_has_cached_files(rd.new_dentry)) {
> + close_cached = dget(rd.new_dentry);
> + goto out_unlock;
> } else {
> - struct renamedata rd = {
> - .mnt_idmap = &nop_mnt_idmap,
> - .old_parent = fdentry,
> - .old_dentry = odentry,
> - .new_parent = tdentry,
> - .new_dentry = ndentry,
> - };
> int retries;
>
> for (retries = 1;;) {
> host_err = vfs_rename(&rd);
> if (host_err != -EAGAIN || !retries--)
> break;
> - if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry)))
> + if (!nfsd_wait_for_delegreturn(rqstp, d_inode(rd.old_dentry)))
> break;
> }
> if (!host_err) {
> @@ -1983,11 +1966,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
> host_err = commit_metadata(ffhp);
> }
> }
> - out_dput_new:
> - dput(ndentry);
> - out_dput_old:
> - dput(odentry);
> - out_nfserr:
> if (host_err == -EBUSY) {
> /*
> * See RFC 8881 Section 18.26.4 para 1-3: NFSv4 RENAME
> @@ -2006,7 +1984,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
> fh_fill_post_attrs(tfhp);
> }
> out_unlock:
> - unlock_rename(tdentry, fdentry);
> + end_renaming(&rd);
> out_want_write:
> fh_drop_write(ffhp);
>
> @@ -2017,9 +1995,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
> * until this point and then reattempt the whole shebang.
> */
> if (close_cached) {
> - close_cached = false;
> - nfsd_close_cached_files(ndentry);
> - dput(ndentry);
> + nfsd_close_cached_files(close_cached);
> + dput(close_cached);
> goto retry;
> }
> out:
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index c8d0885ee5e0..0f2c2da68433 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -1124,9 +1124,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
> int err;
> struct dentry *old_upperdir;
> struct dentry *new_upperdir;
> - struct dentry *olddentry = NULL;
> - struct dentry *newdentry = NULL;
> - struct dentry *trap, *de;
> + struct renamedata rd = {};
> bool old_opaque;
> bool new_opaque;
> bool cleanup_whiteout = false;
> @@ -1136,6 +1134,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
> bool new_is_dir = d_is_dir(new);
> bool samedir = olddir == newdir;
> struct dentry *opaquedir = NULL;
> + struct dentry *whiteout = NULL;
> const struct cred *old_cred = NULL;
> struct ovl_fs *ofs = OVL_FS(old->d_sb);
> LIST_HEAD(list);
> @@ -1233,29 +1232,21 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
> }
> }
>
> - trap = lock_rename(new_upperdir, old_upperdir);
> - if (IS_ERR(trap)) {
> - err = PTR_ERR(trap);
> - goto out_revert_creds;
> - }
> + rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
> + rd.old_parent = old_upperdir;
> + rd.new_parent = new_upperdir;
> + rd.flags = flags;
>
> - de = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
> - old->d_name.len);
> - err = PTR_ERR(de);
> - if (IS_ERR(de))
> - goto out_unlock;
> - olddentry = de;
> + err = start_renaming(&rd, 0,
> + &QSTR_LEN(old->d_name.name, old->d_name.len),
> + &QSTR_LEN(new->d_name.name, new->d_name.len));
>
> - err = -ESTALE;
> - if (!ovl_matches_upper(old, olddentry))
> - goto out_unlock;
> + if (err)
> + goto out_revert_creds;
>
> - de = ovl_lookup_upper(ofs, new->d_name.name, new_upperdir,
> - new->d_name.len);
> - err = PTR_ERR(de);
> - if (IS_ERR(de))
> + err = -ESTALE;
> + if (!ovl_matches_upper(old, rd.old_dentry))
> goto out_unlock;
> - newdentry = de;
>
> old_opaque = ovl_dentry_is_opaque(old);
> new_opaque = ovl_dentry_is_opaque(new);
> @@ -1263,15 +1254,15 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
> err = -ESTALE;
> if (d_inode(new) && ovl_dentry_upper(new)) {
> if (opaquedir) {
> - if (newdentry != opaquedir)
> + if (rd.new_dentry != opaquedir)
> goto out_unlock;
> } else {
> - if (!ovl_matches_upper(new, newdentry))
> + if (!ovl_matches_upper(new, rd.new_dentry))
> goto out_unlock;
> }
> } else {
> - if (!d_is_negative(newdentry)) {
> - if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry))
> + if (!d_is_negative(rd.new_dentry)) {
> + if (!new_opaque || !ovl_upper_is_whiteout(ofs, rd.new_dentry))
> goto out_unlock;
> } else {
> if (flags & RENAME_EXCHANGE)
> @@ -1279,19 +1270,14 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
> }
> }
>
> - if (olddentry == trap)
> - goto out_unlock;
> - if (newdentry == trap)
> - goto out_unlock;
> -
> - if (olddentry->d_inode == newdentry->d_inode)
> + if (rd.old_dentry->d_inode == rd.new_dentry->d_inode)
> goto out_unlock;
>
> err = 0;
> if (ovl_type_merge_or_lower(old))
> err = ovl_set_redirect(old, samedir);
> else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
> - err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
> + err = ovl_set_opaque_xerr(old, rd.old_dentry, -EXDEV);
> if (err)
> goto out_unlock;
>
> @@ -1299,18 +1285,24 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
> err = ovl_set_redirect(new, samedir);
> else if (!overwrite && new_is_dir && !new_opaque &&
> ovl_type_merge(old->d_parent))
> - err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
> + err = ovl_set_opaque_xerr(new, rd.new_dentry, -EXDEV);
> if (err)
> goto out_unlock;
>
> - err = ovl_do_rename(ofs, old_upperdir, olddentry,
> - new_upperdir, newdentry, flags);
> - unlock_rename(new_upperdir, old_upperdir);
> + err = ovl_do_rename_rd(&rd);
> +
> + if (!err && cleanup_whiteout)
> + whiteout = dget(rd.new_dentry);
> +
> + end_renaming(&rd);
> +
> if (err)
> goto out_revert_creds;
>
> - if (cleanup_whiteout)
> - ovl_cleanup(ofs, old_upperdir, newdentry);
> + if (whiteout) {
> + ovl_cleanup(ofs, old_upperdir, whiteout);
> + dput(whiteout);
> + }
>
> if (overwrite && d_inode(new)) {
> if (new_is_dir)
> @@ -1336,14 +1328,12 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
> else
> ovl_drop_write(old);
> out:
> - dput(newdentry);
> - dput(olddentry);
> dput(opaquedir);
> ovl_cache_free(&list);
> return err;
>
> out_unlock:
> - unlock_rename(new_upperdir, old_upperdir);
> + end_renaming(&rd);
> goto out_revert_creds;
> }
>
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 49ad65f829dc..3cc85a893b5c 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -355,11 +355,24 @@ static inline int ovl_do_remove_acl(struct ovl_fs *ofs, struct dentry *dentry,
> return vfs_remove_acl(ovl_upper_mnt_idmap(ofs), dentry, acl_name);
> }
>
> +static inline int ovl_do_rename_rd(struct renamedata *rd)
> +{
> + int err;
> +
> + pr_debug("rename(%pd2, %pd2, 0x%x)\n", rd->old_dentry, rd->new_dentry,
> + rd->flags);
> + err = vfs_rename(rd);
> + if (err) {
> + pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
> + rd->old_dentry, rd->new_dentry, err);
> + }
> + return err;
> +}
> +
> static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir,
> struct dentry *olddentry, struct dentry *newdir,
> struct dentry *newdentry, unsigned int flags)
> {
> - int err;
> struct renamedata rd = {
> .mnt_idmap = ovl_upper_mnt_idmap(ofs),
> .old_parent = olddir,
> @@ -369,13 +382,7 @@ static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir,
> .flags = flags,
> };
>
> - pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
> - err = vfs_rename(&rd);
> - if (err) {
> - pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
> - olddentry, newdentry, err);
> - }
> - return err;
> + return ovl_do_rename_rd(&rd);
> }
>
> static inline int ovl_do_whiteout(struct ovl_fs *ofs,
> diff --git a/include/linux/namei.h b/include/linux/namei.h
> index e5cff89679df..19c3d8e336d5 100644
> --- a/include/linux/namei.h
> +++ b/include/linux/namei.h
> @@ -156,6 +156,9 @@ extern int follow_up(struct path *);
> extern struct dentry *lock_rename(struct dentry *, struct dentry *);
> extern struct dentry *lock_rename_child(struct dentry *, struct dentry *);
> extern void unlock_rename(struct dentry *, struct dentry *);
> +int start_renaming(struct renamedata *rd, int lookup_flags,
> + struct qstr *old_last, struct qstr *new_last);
> +void end_renaming(struct renamedata *rd);
>
> /**
> * mode_strip_umask - handle vfs umask stripping
For the fs/nfsd/vfs.c hunks:
Acked-by: Chuck Lever <chuck.lever@oracle.com>
--
Chuck Lever
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v4 10/14] VFS/ovl/smb: introduce start_renaming_dentry()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (8 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 09/14] VFS/nfsd/ovl: introduce start_renaming() and end_renaming() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-30 0:02 ` Namjae Jeon
2025-10-29 23:31 ` [PATCH v4 11/14] Add start_renaming_two_dentries() NeilBrown
` (3 subsequent siblings)
13 siblings, 1 reply; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
Several callers perform a rename on a dentry they already have, and only
require lookup for the target name. This includes smb/server and a few
different places in overlayfs.
start_renaming_dentry() performs the required lookup and takes the
required lock using lock_rename_child()
It is used in three places in overlayfs and in ksmbd_vfs_rename().
In the ksmbd case, the parent of the source is not important - the
source must be renamed from wherever it is. So start_renaming_dentry()
allows rd->old_parent to be NULL and only checks it if it is non-NULL.
On success rd->old_parent will be the parent of old_dentry with an extra
reference taken. Other start_renaming function also now take the extra
reference and end_renaming() now drops this reference as well.
ovl_lookup_temp(), ovl_parent_lock(), and ovl_parent_unlock() are
all removed as they are no longer needed.
OVL_TEMPNAME_SIZE and ovl_tempname() are now declared in overlayfs.h so
that ovl_check_rename_whiteout() can access them.
ovl_copy_up_workdir() now always cleans up on error.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/namei.c | 108 ++++++++++++++++++++++++++++++++++++---
fs/overlayfs/copy_up.c | 54 +++++++++-----------
fs/overlayfs/dir.c | 19 +------
fs/overlayfs/overlayfs.h | 8 +--
fs/overlayfs/super.c | 22 ++++----
fs/overlayfs/util.c | 11 ----
fs/smb/server/vfs.c | 60 ++++------------------
include/linux/namei.h | 2 +
8 files changed, 150 insertions(+), 134 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index 0ee0a110b088..5153ceddd37a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3669,7 +3669,7 @@ EXPORT_SYMBOL(unlock_rename);
/**
* __start_renaming - lookup and lock names for rename
- * @rd: rename data containing parent and flags, and
+ * @rd: rename data containing parents and flags, and
* for receiving found dentries
* @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL,
* LOOKUP_NO_SYMLINKS etc).
@@ -3680,8 +3680,8 @@ EXPORT_SYMBOL(unlock_rename);
* rename.
*
* On success the found dentries are stored in @rd.old_dentry,
- * @rd.new_dentry. These references and the lock are dropped by
- * end_renaming().
+ * @rd.new_dentry and an extra ref is taken on @rd.old_parent.
+ * These references and the lock are dropped by end_renaming().
*
* The passed in qstrs must have the hash calculated, and no permission
* checking is performed.
@@ -3735,6 +3735,7 @@ __start_renaming(struct renamedata *rd, int lookup_flags,
rd->old_dentry = d1;
rd->new_dentry = d2;
+ dget(rd->old_parent);
return 0;
out_dput_d2:
@@ -3748,7 +3749,7 @@ __start_renaming(struct renamedata *rd, int lookup_flags,
/**
* start_renaming - lookup and lock names for rename with permission checking
- * @rd: rename data containing parent and flags, and
+ * @rd: rename data containing parents and flags, and
* for receiving found dentries
* @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL,
* LOOKUP_NO_SYMLINKS etc).
@@ -3759,8 +3760,8 @@ __start_renaming(struct renamedata *rd, int lookup_flags,
* rename.
*
* On success the found dentries are stored in @rd.old_dentry,
- * @rd.new_dentry. These references and the lock are dropped by
- * end_renaming().
+ * @rd.new_dentry. Also the refcount on @rd->old_parent is increased.
+ * These references and the lock are dropped by end_renaming().
*
* The passed in qstrs need not have the hash calculated, and basic
* eXecute permission checking is performed against @rd.mnt_idmap.
@@ -3782,11 +3783,106 @@ int start_renaming(struct renamedata *rd, int lookup_flags,
}
EXPORT_SYMBOL(start_renaming);
+static int
+__start_renaming_dentry(struct renamedata *rd, int lookup_flags,
+ struct dentry *old_dentry, struct qstr *new_last)
+{
+ struct dentry *trap;
+ struct dentry *d2;
+ int target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
+ int err;
+
+ if (rd->flags & RENAME_EXCHANGE)
+ target_flags = 0;
+ if (rd->flags & RENAME_NOREPLACE)
+ target_flags |= LOOKUP_EXCL;
+
+ /* Already have the dentry - need to be sure to lock the correct parent */
+ trap = lock_rename_child(old_dentry, rd->new_parent);
+ if (IS_ERR(trap))
+ return PTR_ERR(trap);
+ if (d_unhashed(old_dentry) ||
+ (rd->old_parent && rd->old_parent != old_dentry->d_parent)) {
+ /* dentry was removed, or moved and explicit parent requested */
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ d2 = lookup_one_qstr_excl(new_last, rd->new_parent,
+ lookup_flags | target_flags);
+ err = PTR_ERR(d2);
+ if (IS_ERR(d2))
+ goto out_unlock;
+
+ if (old_dentry == trap) {
+ /* source is an ancestor of target */
+ err = -EINVAL;
+ goto out_dput_d2;
+ }
+
+ if (d2 == trap) {
+ /* target is an ancestor of source */
+ if (rd->flags & RENAME_EXCHANGE)
+ err = -EINVAL;
+ else
+ err = -ENOTEMPTY;
+ goto out_dput_d2;
+ }
+
+ rd->old_dentry = dget(old_dentry);
+ rd->new_dentry = d2;
+ rd->old_parent = dget(old_dentry->d_parent);
+ return 0;
+
+out_dput_d2:
+ dput(d2);
+out_unlock:
+ unlock_rename(old_dentry->d_parent, rd->new_parent);
+ return err;
+}
+
+/**
+ * start_renaming_dentry - lookup and lock name for rename with permission checking
+ * @rd: rename data containing parents and flags, and
+ * for receiving found dentries
+ * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL,
+ * LOOKUP_NO_SYMLINKS etc).
+ * @old_dentry: dentry of name to move
+ * @new_last: name of target in @rd.new_parent
+ *
+ * Look up target name and ensure locks are in place for
+ * rename.
+ *
+ * On success the found dentry is stored in @rd.new_dentry and
+ * @rd.old_parent is confirmed to be the parent of @old_dentry. If it
+ * was originally %NULL, it is set. In either case a reference is taken
+ * so that end_renaming() can have a stable reference to unlock.
+ *
+ * References and the lock can be dropped with end_renaming()
+ *
+ * The passed in qstr need not have the hash calculated, and basic
+ * eXecute permission checking is performed against @rd.mnt_idmap.
+ *
+ * Returns: zero or an error.
+ */
+int start_renaming_dentry(struct renamedata *rd, int lookup_flags,
+ struct dentry *old_dentry, struct qstr *new_last)
+{
+ int err;
+
+ err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent);
+ if (err)
+ return err;
+ return __start_renaming_dentry(rd, lookup_flags, old_dentry, new_last);
+}
+EXPORT_SYMBOL(start_renaming_dentry);
+
void end_renaming(struct renamedata *rd)
{
unlock_rename(rd->old_parent, rd->new_parent);
dput(rd->old_dentry);
dput(rd->new_dentry);
+ dput(rd->old_parent);
}
EXPORT_SYMBOL(end_renaming);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 7a31ca9bdea2..27014ada11c7 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -523,8 +523,8 @@ static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh,
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
- struct dentry *index = NULL;
struct dentry *temp = NULL;
+ struct renamedata rd = {};
struct qstr name = { };
int err;
@@ -556,17 +556,15 @@ static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh,
if (err)
goto out;
- err = ovl_parent_lock(indexdir, temp);
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = indexdir;
+ rd.new_parent = indexdir;
+ err = start_renaming_dentry(&rd, 0, temp, &name);
if (err)
goto out;
- index = ovl_lookup_upper(ofs, name.name, indexdir, name.len);
- if (IS_ERR(index)) {
- err = PTR_ERR(index);
- } else {
- err = ovl_do_rename(ofs, indexdir, temp, indexdir, index, 0);
- dput(index);
- }
- ovl_parent_unlock(indexdir);
+
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
out:
if (err)
ovl_cleanup(ofs, indexdir, temp);
@@ -763,7 +761,8 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *inode;
struct path path = { .mnt = ovl_upper_mnt(ofs) };
- struct dentry *temp, *upper, *trap;
+ struct renamedata rd = {};
+ struct dentry *temp;
struct ovl_cu_creds cc;
int err;
struct ovl_cattr cattr = {
@@ -807,29 +806,24 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
* ovl_copy_up_data(), so lock workdir and destdir and make sure that
* temp wasn't moved before copy up completion or cleanup.
*/
- trap = lock_rename(c->workdir, c->destdir);
- if (trap || temp->d_parent != c->workdir) {
- /* temp or workdir moved underneath us? abort without cleanup */
- dput(temp);
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = c->workdir;
+ rd.new_parent = c->destdir;
+ rd.flags = 0;
+ err = start_renaming_dentry(&rd, 0, temp,
+ &QSTR_LEN(c->destname.name, c->destname.len));
+ if (err) {
+ /* temp or workdir moved underneath us? map to -EIO */
err = -EIO;
- if (!IS_ERR(trap))
- unlock_rename(c->workdir, c->destdir);
- goto out;
}
-
- err = ovl_copy_up_metadata(c, temp);
if (err)
- goto cleanup;
+ goto cleanup_unlocked;
- upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir,
- c->destname.len);
- err = PTR_ERR(upper);
- if (IS_ERR(upper))
- goto cleanup;
+ err = ovl_copy_up_metadata(c, temp);
+ if (!err)
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
- err = ovl_do_rename(ofs, c->workdir, temp, c->destdir, upper, 0);
- unlock_rename(c->workdir, c->destdir);
- dput(upper);
if (err)
goto cleanup_unlocked;
@@ -850,8 +844,6 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
return err;
-cleanup:
- unlock_rename(c->workdir, c->destdir);
cleanup_unlocked:
ovl_cleanup(ofs, c->workdir, temp);
dput(temp);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 0f2c2da68433..2b423ebc85c4 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -57,8 +57,7 @@ int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir,
return 0;
}
-#define OVL_TEMPNAME_SIZE 20
-static void ovl_tempname(char name[OVL_TEMPNAME_SIZE])
+void ovl_tempname(char name[OVL_TEMPNAME_SIZE])
{
static atomic_t temp_id = ATOMIC_INIT(0);
@@ -66,22 +65,6 @@ static void ovl_tempname(char name[OVL_TEMPNAME_SIZE])
snprintf(name, OVL_TEMPNAME_SIZE, "#%x", atomic_inc_return(&temp_id));
}
-struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir)
-{
- struct dentry *temp;
- char name[OVL_TEMPNAME_SIZE];
-
- ovl_tempname(name);
- temp = ovl_lookup_upper(ofs, name, workdir, strlen(name));
- if (!IS_ERR(temp) && temp->d_inode) {
- pr_err("workdir/%s already exists\n", name);
- dput(temp);
- temp = ERR_PTR(-EIO);
- }
-
- return temp;
-}
-
static struct dentry *ovl_start_creating_temp(struct ovl_fs *ofs,
struct dentry *workdir)
{
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 3cc85a893b5c..746bc4ad7b37 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -447,11 +447,6 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
}
/* util.c */
-int ovl_parent_lock(struct dentry *parent, struct dentry *child);
-static inline void ovl_parent_unlock(struct dentry *parent)
-{
- inode_unlock(parent->d_inode);
-}
int ovl_get_write_access(struct dentry *dentry);
void ovl_put_write_access(struct dentry *dentry);
void ovl_start_write(struct dentry *dentry);
@@ -888,7 +883,8 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs,
struct dentry *parent, struct dentry *newdentry,
struct ovl_cattr *attr);
int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir, struct dentry *dentry);
-struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir);
+#define OVL_TEMPNAME_SIZE 20
+void ovl_tempname(char name[OVL_TEMPNAME_SIZE]);
struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
struct ovl_cattr *attr);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 6e0816c1147a..a721ef2b90e8 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -566,9 +566,10 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
{
struct dentry *workdir = ofs->workdir;
struct dentry *temp;
- struct dentry *dest;
struct dentry *whiteout;
struct name_snapshot name;
+ struct renamedata rd = {};
+ char name2[OVL_TEMPNAME_SIZE];
int err;
temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0));
@@ -576,23 +577,21 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
if (IS_ERR(temp))
return err;
- err = ovl_parent_lock(workdir, temp);
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = workdir;
+ rd.new_parent = workdir;
+ rd.flags = RENAME_WHITEOUT;
+ ovl_tempname(name2);
+ err = start_renaming_dentry(&rd, 0, temp, &QSTR(name2));
if (err) {
dput(temp);
return err;
}
- dest = ovl_lookup_temp(ofs, workdir);
- err = PTR_ERR(dest);
- if (IS_ERR(dest)) {
- dput(temp);
- ovl_parent_unlock(workdir);
- return err;
- }
/* Name is inline and stable - using snapshot as a copy helper */
take_dentry_name_snapshot(&name, temp);
- err = ovl_do_rename(ofs, workdir, temp, workdir, dest, RENAME_WHITEOUT);
- ovl_parent_unlock(workdir);
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
if (err) {
if (err == -EINVAL)
err = 0;
@@ -616,7 +615,6 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
ovl_cleanup(ofs, workdir, temp);
release_dentry_name_snapshot(&name);
dput(temp);
- dput(dest);
return err;
}
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index f76672f2e686..46387aeb6be6 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -1548,14 +1548,3 @@ void ovl_copyattr(struct inode *inode)
i_size_write(inode, i_size_read(realinode));
spin_unlock(&inode->i_lock);
}
-
-int ovl_parent_lock(struct dentry *parent, struct dentry *child)
-{
- inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
- if (!child ||
- (!d_unhashed(child) && child->d_parent == parent))
- return 0;
-
- inode_unlock(parent->d_inode);
- return -EINVAL;
-}
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 7c4ddc43ab39..f54b5b0aaba2 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -663,7 +663,6 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname,
int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
char *newname, int flags)
{
- struct dentry *old_parent, *new_dentry, *trap;
struct dentry *old_child = old_path->dentry;
struct path new_path;
struct qstr new_last;
@@ -673,7 +672,6 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
struct ksmbd_file *parent_fp;
int new_type;
int err, lookup_flags = LOOKUP_NO_SYMLINKS;
- int target_lookup_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
if (ksmbd_override_fsids(work))
return -ENOMEM;
@@ -684,14 +682,6 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto revert_fsids;
}
- /*
- * explicitly handle file overwrite case, for compatibility with
- * filesystems that may not support rename flags (e.g: fuse)
- */
- if (flags & RENAME_NOREPLACE)
- target_lookup_flags |= LOOKUP_EXCL;
- flags &= ~(RENAME_NOREPLACE);
-
retry:
err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
&new_path, &new_last, &new_type,
@@ -708,17 +698,14 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
if (err)
goto out2;
- trap = lock_rename_child(old_child, new_path.dentry);
- if (IS_ERR(trap)) {
- err = PTR_ERR(trap);
+ rd.mnt_idmap = mnt_idmap(old_path->mnt);
+ rd.old_parent = NULL;
+ rd.new_parent = new_path.dentry;
+ rd.flags = flags;
+ rd.delegated_inode = NULL,
+ err = start_renaming_dentry(&rd, lookup_flags, old_child, &new_last);
+ if (err)
goto out_drop_write;
- }
-
- old_parent = dget(old_child->d_parent);
- if (d_unhashed(old_child)) {
- err = -EINVAL;
- goto out3;
- }
parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent);
if (parent_fp) {
@@ -731,44 +718,17 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
ksmbd_fd_put(work, parent_fp);
}
- new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
- lookup_flags | target_lookup_flags);
- if (IS_ERR(new_dentry)) {
- err = PTR_ERR(new_dentry);
- goto out3;
- }
-
- if (d_is_symlink(new_dentry)) {
+ if (d_is_symlink(rd.new_dentry)) {
err = -EACCES;
- goto out4;
- }
-
- if (old_child == trap) {
- err = -EINVAL;
- goto out4;
- }
-
- if (new_dentry == trap) {
- err = -ENOTEMPTY;
- goto out4;
+ goto out3;
}
- rd.mnt_idmap = mnt_idmap(old_path->mnt),
- rd.old_parent = old_parent,
- rd.old_dentry = old_child,
- rd.new_parent = new_path.dentry,
- rd.new_dentry = new_dentry,
- rd.flags = flags,
- rd.delegated_inode = NULL,
err = vfs_rename(&rd);
if (err)
ksmbd_debug(VFS, "vfs_rename failed err %d\n", err);
-out4:
- dput(new_dentry);
out3:
- dput(old_parent);
- unlock_rename(old_parent, new_path.dentry);
+ end_renaming(&rd);
out_drop_write:
mnt_drop_write(old_path->mnt);
out2:
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 19c3d8e336d5..f73001e3719a 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -158,6 +158,8 @@ extern struct dentry *lock_rename_child(struct dentry *, struct dentry *);
extern void unlock_rename(struct dentry *, struct dentry *);
int start_renaming(struct renamedata *rd, int lookup_flags,
struct qstr *old_last, struct qstr *new_last);
+int start_renaming_dentry(struct renamedata *rd, int lookup_flags,
+ struct dentry *old_dentry, struct qstr *new_last);
void end_renaming(struct renamedata *rd);
/**
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v4 10/14] VFS/ovl/smb: introduce start_renaming_dentry()
2025-10-29 23:31 ` [PATCH v4 10/14] VFS/ovl/smb: introduce start_renaming_dentry() NeilBrown
@ 2025-10-30 0:02 ` Namjae Jeon
0 siblings, 0 replies; 24+ messages in thread
From: Namjae Jeon @ 2025-10-30 0:02 UTC (permalink / raw)
To: NeilBrown
Cc: Alexander Viro, Christian Brauner, Amir Goldstein, Jan Kara,
linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Steve French, Sergey Senozhatsky,
Carlos Maiolino, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek, Mateusz Guzik,
Lorenzo Stoakes, Stefan Berger, Darrick J. Wong, linux-kernel,
netfs, ecryptfs, linux-nfs, linux-unionfs, linux-cifs, linux-xfs,
apparmor, linux-security-module, selinux
On Thu, Oct 30, 2025 at 8:46 AM NeilBrown <neilb@ownmail.net> wrote:
>
> From: NeilBrown <neil@brown.name>
>
> Several callers perform a rename on a dentry they already have, and only
> require lookup for the target name. This includes smb/server and a few
> different places in overlayfs.
>
> start_renaming_dentry() performs the required lookup and takes the
> required lock using lock_rename_child()
>
> It is used in three places in overlayfs and in ksmbd_vfs_rename().
>
> In the ksmbd case, the parent of the source is not important - the
> source must be renamed from wherever it is. So start_renaming_dentry()
> allows rd->old_parent to be NULL and only checks it if it is non-NULL.
> On success rd->old_parent will be the parent of old_dentry with an extra
> reference taken. Other start_renaming function also now take the extra
> reference and end_renaming() now drops this reference as well.
>
> ovl_lookup_temp(), ovl_parent_lock(), and ovl_parent_unlock() are
> all removed as they are no longer needed.
>
> OVL_TEMPNAME_SIZE and ovl_tempname() are now declared in overlayfs.h so
> that ovl_check_rename_whiteout() can access them.
>
> ovl_copy_up_workdir() now always cleans up on error.
>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> Signed-off-by: NeilBrown <neil@brown.name>
For ksmbd part,
Reviewed-by: Namjae Jeon <linkinjeon@kernel.org>
Thanks!
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v4 11/14] Add start_renaming_two_dentries()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (9 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 10/14] VFS/ovl/smb: introduce start_renaming_dentry() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-30 6:22 ` Al Viro
2025-10-29 23:31 ` [PATCH v4 12/14] ecryptfs: use new start_creating/start_removing APIs NeilBrown
` (2 subsequent siblings)
13 siblings, 1 reply; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
A few callers want to lock for a rename and already have both dentries.
Also debugfs does want to perform a lookup but doesn't want permission
checking, so start_renaming_dentry() cannot be used.
This patch introduces start_renaming_two_dentries() which is given both
dentries. debugfs performs one lookup itself. As it will only continue
with a negative dentry and as those cannot be renamed or unlinked, it is
safe to do the lookup before getting the rename locks.
overlayfs uses start_renaming_two_dentries() in three places and selinux
uses it twice in sel_make_policy_nodes().
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
changes since v3:
added missing assignment to rd.mnt_idmap in ovl_cleanup_and_whiteout
---
fs/debugfs/inode.c | 48 ++++++++++++--------------
fs/namei.c | 65 ++++++++++++++++++++++++++++++++++++
fs/overlayfs/dir.c | 43 ++++++++++++++++--------
include/linux/namei.h | 2 ++
security/selinux/selinuxfs.c | 27 ++++++++++-----
5 files changed, 136 insertions(+), 49 deletions(-)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index f241b9df642a..532bd7c46baf 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -842,7 +842,8 @@ int __printf(2, 3) debugfs_change_name(struct dentry *dentry, const char *fmt, .
int error = 0;
const char *new_name;
struct name_snapshot old_name;
- struct dentry *parent, *target;
+ struct dentry *target;
+ struct renamedata rd = {};
struct inode *dir;
va_list ap;
@@ -855,36 +856,31 @@ int __printf(2, 3) debugfs_change_name(struct dentry *dentry, const char *fmt, .
if (!new_name)
return -ENOMEM;
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- inode_lock(dir);
+ rd.old_parent = dget_parent(dentry);
+ rd.new_parent = rd.old_parent;
+ rd.flags = RENAME_NOREPLACE;
+ target = lookup_noperm_unlocked(&QSTR(new_name), rd.new_parent);
+ if (IS_ERR(target))
+ return PTR_ERR(target);
- take_dentry_name_snapshot(&old_name, dentry);
-
- if (WARN_ON_ONCE(dentry->d_parent != parent)) {
- error = -EINVAL;
- goto out;
- }
- if (strcmp(old_name.name.name, new_name) == 0)
- goto out;
- target = lookup_noperm(&QSTR(new_name), parent);
- if (IS_ERR(target)) {
- error = PTR_ERR(target);
- goto out;
- }
- if (d_really_is_positive(target)) {
- dput(target);
- error = -EINVAL;
+ error = start_renaming_two_dentries(&rd, dentry, target);
+ if (error) {
+ if (error == -EEXIST && target == dentry)
+ /* it isn't an error to rename a thing to itself */
+ error = 0;
goto out;
}
- simple_rename_timestamp(dir, dentry, dir, target);
- d_move(dentry, target);
- dput(target);
+
+ dir = d_inode(rd.old_parent);
+ take_dentry_name_snapshot(&old_name, dentry);
+ simple_rename_timestamp(dir, dentry, dir, rd.new_dentry);
+ d_move(dentry, rd.new_dentry);
fsnotify_move(dir, dir, &old_name.name, d_is_dir(dentry), NULL, dentry);
-out:
release_dentry_name_snapshot(&old_name);
- inode_unlock(dir);
- dput(parent);
+ end_renaming(&rd);
+out:
+ dput(rd.old_parent);
+ dput(target);
kfree_const(new_name);
return error;
}
diff --git a/fs/namei.c b/fs/namei.c
index 5153ceddd37a..4a4b8b96c192 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3877,6 +3877,71 @@ int start_renaming_dentry(struct renamedata *rd, int lookup_flags,
}
EXPORT_SYMBOL(start_renaming_dentry);
+/**
+ * start_renaming_two_dentries - Lock to dentries in given parents for rename
+ * @rd: rename data containing parent
+ * @old_dentry: dentry of name to move
+ * @new_dentry: dentry to move to
+ *
+ * Ensure locks are in place for rename and check parentage is still correct.
+ *
+ * On success the two dentries are stored in @rd.old_dentry and
+ * @rd.new_dentry and @rd.old_parent and @rd.new_parent are confirmed to
+ * be the parents of the dentries.
+ *
+ * References and the lock can be dropped with end_renaming()
+ *
+ * Returns: zero or an error.
+ */
+int
+start_renaming_two_dentries(struct renamedata *rd,
+ struct dentry *old_dentry, struct dentry *new_dentry)
+{
+ struct dentry *trap;
+ int err;
+
+ /* Already have the dentry - need to be sure to lock the correct parent */
+ trap = lock_rename_child(old_dentry, rd->new_parent);
+ if (IS_ERR(trap))
+ return PTR_ERR(trap);
+ err = -EINVAL;
+ if (d_unhashed(old_dentry) ||
+ (rd->old_parent && rd->old_parent != old_dentry->d_parent))
+ /* old_dentry was removed, or moved and explicit parent requested */
+ goto out_unlock;
+ if (d_unhashed(new_dentry) ||
+ rd->new_parent != new_dentry->d_parent)
+ /* new_dentry was removed or moved */
+ goto out_unlock;
+
+ if (old_dentry == trap)
+ /* source is an ancestor of target */
+ goto out_unlock;
+
+ if (new_dentry == trap) {
+ /* target is an ancestor of source */
+ if (rd->flags & RENAME_EXCHANGE)
+ err = -EINVAL;
+ else
+ err = -ENOTEMPTY;
+ goto out_unlock;
+ }
+
+ err = -EEXIST;
+ if (d_is_positive(new_dentry) && (rd->flags & RENAME_NOREPLACE))
+ goto out_unlock;
+
+ rd->old_dentry = dget(old_dentry);
+ rd->new_dentry = dget(new_dentry);
+ rd->old_parent = dget(old_dentry->d_parent);
+ return 0;
+
+out_unlock:
+ unlock_rename(old_dentry->d_parent, rd->new_parent);
+ return err;
+}
+EXPORT_SYMBOL(start_renaming_two_dentries);
+
void end_renaming(struct renamedata *rd)
{
unlock_rename(rd->old_parent, rd->new_parent);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 2b423ebc85c4..c297648c7487 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -123,6 +123,7 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir,
struct dentry *dentry)
{
struct dentry *whiteout;
+ struct renamedata rd = {};
int err;
int flags = 0;
@@ -134,10 +135,14 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir,
if (d_is_dir(dentry))
flags = RENAME_EXCHANGE;
- err = ovl_lock_rename_workdir(ofs->workdir, whiteout, dir, dentry);
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = ofs->workdir;
+ rd.new_parent = dir;
+ rd.flags = flags;
+ err = start_renaming_two_dentries(&rd, whiteout, dentry);
if (!err) {
- err = ovl_do_rename(ofs, ofs->workdir, whiteout, dir, dentry, flags);
- unlock_rename(ofs->workdir, dir);
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
}
if (err)
goto kill_whiteout;
@@ -388,6 +393,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+ struct renamedata rd = {};
struct path upperpath;
struct dentry *upper;
struct dentry *opaquedir;
@@ -413,7 +419,11 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (IS_ERR(opaquedir))
goto out;
- err = ovl_lock_rename_workdir(workdir, opaquedir, upperdir, upper);
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = workdir;
+ rd.new_parent = upperdir;
+ rd.flags = RENAME_EXCHANGE;
+ err = start_renaming_two_dentries(&rd, opaquedir, upper);
if (err)
goto out_cleanup_unlocked;
@@ -431,8 +441,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (err)
goto out_cleanup;
- err = ovl_do_rename(ofs, workdir, opaquedir, upperdir, upper, RENAME_EXCHANGE);
- unlock_rename(workdir, upperdir);
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
if (err)
goto out_cleanup_unlocked;
@@ -445,7 +455,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
return opaquedir;
out_cleanup:
- unlock_rename(workdir, upperdir);
+ end_renaming(&rd);
out_cleanup_unlocked:
ovl_cleanup(ofs, workdir, opaquedir);
dput(opaquedir);
@@ -468,6 +478,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+ struct renamedata rd = {};
struct dentry *upper;
struct dentry *newdentry;
int err;
@@ -499,7 +510,11 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (IS_ERR(newdentry))
goto out_dput;
- err = ovl_lock_rename_workdir(workdir, newdentry, upperdir, upper);
+ rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_parent = workdir;
+ rd.new_parent = upperdir;
+ rd.flags = 0;
+ err = start_renaming_two_dentries(&rd, newdentry, upper);
if (err)
goto out_cleanup_unlocked;
@@ -536,16 +551,16 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (err)
goto out_cleanup;
- err = ovl_do_rename(ofs, workdir, newdentry, upperdir, upper,
- RENAME_EXCHANGE);
- unlock_rename(workdir, upperdir);
+ rd.flags = RENAME_EXCHANGE;
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
if (err)
goto out_cleanup_unlocked;
ovl_cleanup(ofs, workdir, upper);
} else {
- err = ovl_do_rename(ofs, workdir, newdentry, upperdir, upper, 0);
- unlock_rename(workdir, upperdir);
+ err = ovl_do_rename_rd(&rd);
+ end_renaming(&rd);
if (err)
goto out_cleanup_unlocked;
}
@@ -565,7 +580,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
return err;
out_cleanup:
- unlock_rename(workdir, upperdir);
+ end_renaming(&rd);
out_cleanup_unlocked:
ovl_cleanup(ofs, workdir, newdentry);
dput(newdentry);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f73001e3719a..a99ac8b7e24a 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -160,6 +160,8 @@ int start_renaming(struct renamedata *rd, int lookup_flags,
struct qstr *old_last, struct qstr *new_last);
int start_renaming_dentry(struct renamedata *rd, int lookup_flags,
struct dentry *old_dentry, struct qstr *new_last);
+int start_renaming_two_dentries(struct renamedata *rd,
+ struct dentry *old_dentry, struct dentry *new_dentry);
void end_renaming(struct renamedata *rd);
/**
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 232e087bce3e..a224ef9bb831 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -506,6 +506,7 @@ static int sel_make_policy_nodes(struct selinux_fs_info *fsi,
{
int ret = 0;
struct dentry *tmp_parent, *tmp_bool_dir, *tmp_class_dir;
+ struct renamedata rd = {};
unsigned int bool_num = 0;
char **bool_names = NULL;
int *bool_values = NULL;
@@ -539,22 +540,30 @@ static int sel_make_policy_nodes(struct selinux_fs_info *fsi,
if (ret)
goto out;
- lock_rename(tmp_parent, fsi->sb->s_root);
+ rd.old_parent = tmp_parent;
+ rd.new_parent = fsi->sb->s_root;
/* booleans */
- d_exchange(tmp_bool_dir, fsi->bool_dir);
+ ret = start_renaming_two_dentries(&rd, tmp_bool_dir, fsi->bool_dir);
+ if (!ret) {
+ d_exchange(tmp_bool_dir, fsi->bool_dir);
- swap(fsi->bool_num, bool_num);
- swap(fsi->bool_pending_names, bool_names);
- swap(fsi->bool_pending_values, bool_values);
+ swap(fsi->bool_num, bool_num);
+ swap(fsi->bool_pending_names, bool_names);
+ swap(fsi->bool_pending_values, bool_values);
- fsi->bool_dir = tmp_bool_dir;
+ fsi->bool_dir = tmp_bool_dir;
+ end_renaming(&rd);
+ }
/* classes */
- d_exchange(tmp_class_dir, fsi->class_dir);
- fsi->class_dir = tmp_class_dir;
+ ret = start_renaming_two_dentries(&rd, tmp_class_dir, fsi->class_dir);
+ if (ret == 0) {
+ d_exchange(tmp_class_dir, fsi->class_dir);
+ fsi->class_dir = tmp_class_dir;
- unlock_rename(tmp_parent, fsi->sb->s_root);
+ end_renaming(&rd);
+ }
out:
sel_remove_old_bool_data(bool_num, bool_names, bool_values);
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v4 11/14] Add start_renaming_two_dentries()
2025-10-29 23:31 ` [PATCH v4 11/14] Add start_renaming_two_dentries() NeilBrown
@ 2025-10-30 6:22 ` Al Viro
2025-10-30 23:37 ` NeilBrown
0 siblings, 1 reply; 24+ messages in thread
From: Al Viro @ 2025-10-30 6:22 UTC (permalink / raw)
To: NeilBrown
Cc: Christian Brauner, Amir Goldstein, Jan Kara, linux-fsdevel,
Jeff Layton, Chris Mason, David Sterba, David Howells,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Tyler Hicks, Miklos Szeredi, Chuck Lever, Olga Kornievskaia,
Dai Ngo, Namjae Jeon, Steve French, Sergey Senozhatsky,
Carlos Maiolino, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek, Mateusz Guzik,
Lorenzo Stoakes, Stefan Berger, Darrick J. Wong, linux-kernel,
netfs, ecryptfs, linux-nfs, linux-unionfs, linux-cifs, linux-xfs,
apparmor, linux-security-module, selinux
On Thu, Oct 30, 2025 at 10:31:11AM +1100, NeilBrown wrote:
> +++ b/fs/debugfs/inode.c
Why does debugfs_change_name() need any of that horror? Seriously, WTF?
This is strictly a name change on a filesystem that never, ever moves
anything from one directory to another.
IMO struct renamedata is a fucking eyesore, but that aside, this:
> @@ -539,22 +540,30 @@ static int sel_make_policy_nodes(struct selinux_fs_info *fsi,
> if (ret)
> goto out;
>
> - lock_rename(tmp_parent, fsi->sb->s_root);
> + rd.old_parent = tmp_parent;
> + rd.new_parent = fsi->sb->s_root;
>
> /* booleans */
> - d_exchange(tmp_bool_dir, fsi->bool_dir);
> + ret = start_renaming_two_dentries(&rd, tmp_bool_dir, fsi->bool_dir);
> + if (!ret) {
> + d_exchange(tmp_bool_dir, fsi->bool_dir);
>
> - swap(fsi->bool_num, bool_num);
> - swap(fsi->bool_pending_names, bool_names);
> - swap(fsi->bool_pending_values, bool_values);
> + swap(fsi->bool_num, bool_num);
> + swap(fsi->bool_pending_names, bool_names);
> + swap(fsi->bool_pending_values, bool_values);
>
> - fsi->bool_dir = tmp_bool_dir;
> + fsi->bool_dir = tmp_bool_dir;
> + end_renaming(&rd);
> + }
>
> /* classes */
> - d_exchange(tmp_class_dir, fsi->class_dir);
> - fsi->class_dir = tmp_class_dir;
> + ret = start_renaming_two_dentries(&rd, tmp_class_dir, fsi->class_dir);
> + if (ret == 0) {
> + d_exchange(tmp_class_dir, fsi->class_dir);
> + fsi->class_dir = tmp_class_dir;
>
> - unlock_rename(tmp_parent, fsi->sb->s_root);
> + end_renaming(&rd);
> + }
>
> out:
> sel_remove_old_bool_data(bool_num, bool_names, bool_values);
is very interesting - suddenly you get two non-overlapping scopes instead of one.
Why is that OK?
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH v4 11/14] Add start_renaming_two_dentries()
2025-10-30 6:22 ` Al Viro
@ 2025-10-30 23:37 ` NeilBrown
0 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-30 23:37 UTC (permalink / raw)
To: Al Viro
Cc: Christian Brauner, Amir Goldstein, Jan Kara, linux-fsdevel,
Jeff Layton, Chris Mason, David Sterba, David Howells,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Tyler Hicks, Miklos Szeredi, Chuck Lever, Olga Kornievskaia,
Dai Ngo, Namjae Jeon, Steve French, Sergey Senozhatsky,
Carlos Maiolino, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek, Mateusz Guzik,
Lorenzo Stoakes, Stefan Berger, Darrick J. Wong, linux-kernel,
netfs, ecryptfs, linux-nfs, linux-unionfs, linux-cifs, linux-xfs,
apparmor, linux-security-module, selinux
On Thu, 30 Oct 2025, Al Viro wrote:
> On Thu, Oct 30, 2025 at 10:31:11AM +1100, NeilBrown wrote:
>
> > +++ b/fs/debugfs/inode.c
>
> Why does debugfs_change_name() need any of that horror? Seriously, WTF?
> This is strictly a name change on a filesystem that never, ever moves
> anything from one directory to another.
"horror" is clearly in the eye of the beholder, and not a helpful
description...
Is there anything in this use of start_renaming_two_dentries() which is
harmful? I agree that not all of the functionality is needed in this
case, but some of it is.
Would you prefer we also add
start_renaming_two_dentries_with_same_parent()
or similar?
>
> IMO struct renamedata is a fucking eyesore, but that aside, this:
>
> > @@ -539,22 +540,30 @@ static int sel_make_policy_nodes(struct selinux_fs_info *fsi,
> > if (ret)
> > goto out;
> >
> > - lock_rename(tmp_parent, fsi->sb->s_root);
> > + rd.old_parent = tmp_parent;
> > + rd.new_parent = fsi->sb->s_root;
> >
> > /* booleans */
> > - d_exchange(tmp_bool_dir, fsi->bool_dir);
> > + ret = start_renaming_two_dentries(&rd, tmp_bool_dir, fsi->bool_dir);
> > + if (!ret) {
> > + d_exchange(tmp_bool_dir, fsi->bool_dir);
> >
> > - swap(fsi->bool_num, bool_num);
> > - swap(fsi->bool_pending_names, bool_names);
> > - swap(fsi->bool_pending_values, bool_values);
> > + swap(fsi->bool_num, bool_num);
> > + swap(fsi->bool_pending_names, bool_names);
> > + swap(fsi->bool_pending_values, bool_values);
> >
> > - fsi->bool_dir = tmp_bool_dir;
> > + fsi->bool_dir = tmp_bool_dir;
> > + end_renaming(&rd);
> > + }
> >
> > /* classes */
> > - d_exchange(tmp_class_dir, fsi->class_dir);
> > - fsi->class_dir = tmp_class_dir;
> > + ret = start_renaming_two_dentries(&rd, tmp_class_dir, fsi->class_dir);
> > + if (ret == 0) {
> > + d_exchange(tmp_class_dir, fsi->class_dir);
> > + fsi->class_dir = tmp_class_dir;
> >
> > - unlock_rename(tmp_parent, fsi->sb->s_root);
> > + end_renaming(&rd);
> > + }
> >
> > out:
> > sel_remove_old_bool_data(bool_num, bool_names, bool_values);
>
> is very interesting - suddenly you get two non-overlapping scopes instead of one.
> Why is that OK?
>
From the perspective of code performing lookup of these names, two
consecutive lookups would not be locked so they could see
inconsistencies anyway.
From the perspective of code changing these names, that is protected by
selinux_state.policy_mutex which is held across the combined operation.
A readdir could possibly see the old inum for one name and the new inum
for the other name. I don't imagine this would be a problem.
I have added a comment to the commit message to highlight this.
Thanks,
NeilBrown
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH v4 12/14] ecryptfs: use new start_creating/start_removing APIs
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (10 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 11/14] Add start_renaming_two_dentries() NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-30 6:24 ` Al Viro
2025-10-29 23:31 ` [PATCH v4 13/14] VFS: change vfs_mkdir() to unlock on failure NeilBrown
2025-10-29 23:31 ` [PATCH v4 14/14] VFS: introduce end_creating_keep() NeilBrown
13 siblings, 1 reply; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
This requires the addition of start_creating_dentry() which is given the
dentry which has already been found, and asks for it to be locked and
its parent validated.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/ecryptfs/inode.c | 153 ++++++++++++++++++++----------------------
fs/namei.c | 33 +++++++++
include/linux/namei.h | 2 +
3 files changed, 107 insertions(+), 81 deletions(-)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ed1394da8d6b..b3702105d236 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -24,18 +24,26 @@
#include <linux/unaligned.h>
#include "ecryptfs_kernel.h"
-static int lock_parent(struct dentry *dentry,
- struct dentry **lower_dentry,
- struct inode **lower_dir)
+static struct dentry *ecryptfs_start_creating_dentry(struct dentry *dentry)
{
- struct dentry *lower_dir_dentry;
+ struct dentry *parent = dget_parent(dentry->d_parent);
+ struct dentry *ret;
- lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
- *lower_dir = d_inode(lower_dir_dentry);
- *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ ret = start_creating_dentry(ecryptfs_dentry_to_lower(parent),
+ ecryptfs_dentry_to_lower(dentry));
+ dput(parent);
+ return ret;
+}
- inode_lock_nested(*lower_dir, I_MUTEX_PARENT);
- return (*lower_dentry)->d_parent == lower_dir_dentry ? 0 : -EINVAL;
+static struct dentry *ecryptfs_start_removing_dentry(struct dentry *dentry)
+{
+ struct dentry *parent = dget_parent(dentry->d_parent);
+ struct dentry *ret;
+
+ ret = start_removing_dentry(ecryptfs_dentry_to_lower(parent),
+ ecryptfs_dentry_to_lower(dentry));
+ dput(parent);
+ return ret;
}
static int ecryptfs_inode_test(struct inode *inode, void *lower_inode)
@@ -141,15 +149,12 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
struct inode *lower_dir;
int rc;
- rc = lock_parent(dentry, &lower_dentry, &lower_dir);
- dget(lower_dentry); // don't even try to make the lower negative
- if (!rc) {
- if (d_unhashed(lower_dentry))
- rc = -EINVAL;
- else
- rc = vfs_unlink(&nop_mnt_idmap, lower_dir, lower_dentry,
- NULL);
- }
+ lower_dentry = ecryptfs_start_removing_dentry(dentry);
+ if (IS_ERR(lower_dentry))
+ return PTR_ERR(lower_dentry);
+
+ lower_dir = lower_dentry->d_parent->d_inode;
+ rc = vfs_unlink(&nop_mnt_idmap, lower_dir, lower_dentry, NULL);
if (rc) {
printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
goto out_unlock;
@@ -158,8 +163,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink);
inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
out_unlock:
- dput(lower_dentry);
- inode_unlock(lower_dir);
+ end_removing(lower_dentry);
if (!rc)
d_drop(dentry);
return rc;
@@ -186,10 +190,12 @@ ecryptfs_do_create(struct inode *directory_inode,
struct inode *lower_dir;
struct inode *inode;
- rc = lock_parent(ecryptfs_dentry, &lower_dentry, &lower_dir);
- if (!rc)
- rc = vfs_create(&nop_mnt_idmap, lower_dir,
- lower_dentry, mode, true);
+ lower_dentry = ecryptfs_start_creating_dentry(ecryptfs_dentry);
+ if (IS_ERR(lower_dentry))
+ return ERR_CAST(lower_dentry);
+ lower_dir = lower_dentry->d_parent->d_inode;
+ rc = vfs_create(&nop_mnt_idmap, lower_dir,
+ lower_dentry, mode, true);
if (rc) {
printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
"rc = [%d]\n", __func__, rc);
@@ -205,7 +211,7 @@ ecryptfs_do_create(struct inode *directory_inode,
fsstack_copy_attr_times(directory_inode, lower_dir);
fsstack_copy_inode_size(directory_inode, lower_dir);
out_lock:
- inode_unlock(lower_dir);
+ end_creating(lower_dentry, NULL);
return inode;
}
@@ -433,10 +439,12 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
file_size_save = i_size_read(d_inode(old_dentry));
lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
- rc = lock_parent(new_dentry, &lower_new_dentry, &lower_dir);
- if (!rc)
- rc = vfs_link(lower_old_dentry, &nop_mnt_idmap, lower_dir,
- lower_new_dentry, NULL);
+ lower_new_dentry = ecryptfs_start_creating_dentry(new_dentry);
+ if (IS_ERR(lower_new_dentry))
+ return PTR_ERR(lower_new_dentry);
+ lower_dir = lower_new_dentry->d_parent->d_inode;
+ rc = vfs_link(lower_old_dentry, &nop_mnt_idmap, lower_dir,
+ lower_new_dentry, NULL);
if (rc || d_really_is_negative(lower_new_dentry))
goto out_lock;
rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
@@ -448,7 +456,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
ecryptfs_inode_to_lower(d_inode(old_dentry))->i_nlink);
i_size_write(d_inode(new_dentry), file_size_save);
out_lock:
- inode_unlock(lower_dir);
+ end_creating(lower_new_dentry, NULL);
return rc;
}
@@ -468,9 +476,11 @@ static int ecryptfs_symlink(struct mnt_idmap *idmap,
size_t encoded_symlen;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
- rc = lock_parent(dentry, &lower_dentry, &lower_dir);
- if (rc)
- goto out_lock;
+ lower_dentry = ecryptfs_start_creating_dentry(dentry);
+ if (IS_ERR(lower_dentry))
+ return PTR_ERR(lower_dentry);
+ lower_dir = lower_dentry->d_parent->d_inode;
+
mount_crypt_stat = &ecryptfs_superblock_to_private(
dir->i_sb)->mount_crypt_stat;
rc = ecryptfs_encrypt_and_encode_filename(&encoded_symname,
@@ -490,7 +500,7 @@ static int ecryptfs_symlink(struct mnt_idmap *idmap,
fsstack_copy_attr_times(dir, lower_dir);
fsstack_copy_inode_size(dir, lower_dir);
out_lock:
- inode_unlock(lower_dir);
+ end_creating(lower_dentry, NULL);
if (d_really_is_negative(dentry))
d_drop(dentry);
return rc;
@@ -501,12 +511,14 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
{
int rc;
struct dentry *lower_dentry;
+ struct dentry *lower_dir_dentry;
struct inode *lower_dir;
- rc = lock_parent(dentry, &lower_dentry, &lower_dir);
- if (rc)
- goto out;
-
+ lower_dentry = ecryptfs_start_creating_dentry(dentry);
+ if (IS_ERR(lower_dentry))
+ return lower_dentry;
+ lower_dir_dentry = dget(lower_dentry->d_parent);
+ lower_dir = lower_dir_dentry->d_inode;
lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir,
lower_dentry, mode);
rc = PTR_ERR(lower_dentry);
@@ -522,7 +534,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
fsstack_copy_inode_size(dir, lower_dir);
set_nlink(dir, lower_dir->i_nlink);
out:
- inode_unlock(lower_dir);
+ end_creating(lower_dentry, lower_dir_dentry);
if (d_really_is_negative(dentry))
d_drop(dentry);
return ERR_PTR(rc);
@@ -534,21 +546,18 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
struct inode *lower_dir;
int rc;
- rc = lock_parent(dentry, &lower_dentry, &lower_dir);
- dget(lower_dentry); // don't even try to make the lower negative
- if (!rc) {
- if (d_unhashed(lower_dentry))
- rc = -EINVAL;
- else
- rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry);
- }
+ lower_dentry = ecryptfs_start_removing_dentry(dentry);
+ if (IS_ERR(lower_dentry))
+ return PTR_ERR(lower_dentry);
+ lower_dir = lower_dentry->d_parent->d_inode;
+
+ rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry);
if (!rc) {
clear_nlink(d_inode(dentry));
fsstack_copy_attr_times(dir, lower_dir);
set_nlink(dir, lower_dir->i_nlink);
}
- dput(lower_dentry);
- inode_unlock(lower_dir);
+ end_removing(lower_dentry);
if (!rc)
d_drop(dentry);
return rc;
@@ -562,10 +571,12 @@ ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *lower_dentry;
struct inode *lower_dir;
- rc = lock_parent(dentry, &lower_dentry, &lower_dir);
- if (!rc)
- rc = vfs_mknod(&nop_mnt_idmap, lower_dir,
- lower_dentry, mode, dev);
+ lower_dentry = ecryptfs_start_creating_dentry(dentry);
+ if (IS_ERR(lower_dentry))
+ return PTR_ERR(lower_dentry);
+ lower_dir = lower_dentry->d_parent->d_inode;
+
+ rc = vfs_mknod(&nop_mnt_idmap, lower_dir, lower_dentry, mode, dev);
if (rc || d_really_is_negative(lower_dentry))
goto out;
rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
@@ -574,7 +585,7 @@ ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
fsstack_copy_attr_times(dir, lower_dir);
fsstack_copy_inode_size(dir, lower_dir);
out:
- inode_unlock(lower_dir);
+ end_removing(lower_dentry);
if (d_really_is_negative(dentry))
d_drop(dentry);
return rc;
@@ -590,7 +601,6 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *lower_new_dentry;
struct dentry *lower_old_dir_dentry;
struct dentry *lower_new_dir_dentry;
- struct dentry *trap;
struct inode *target_inode;
struct renamedata rd = {};
@@ -605,31 +615,13 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
target_inode = d_inode(new_dentry);
- trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
- if (IS_ERR(trap))
- return PTR_ERR(trap);
- dget(lower_new_dentry);
- rc = -EINVAL;
- if (lower_old_dentry->d_parent != lower_old_dir_dentry)
- goto out_lock;
- if (lower_new_dentry->d_parent != lower_new_dir_dentry)
- goto out_lock;
- if (d_unhashed(lower_old_dentry) || d_unhashed(lower_new_dentry))
- goto out_lock;
- /* source should not be ancestor of target */
- if (trap == lower_old_dentry)
- goto out_lock;
- /* target should not be ancestor of source */
- if (trap == lower_new_dentry) {
- rc = -ENOTEMPTY;
- goto out_lock;
- }
+ rd.mnt_idmap = &nop_mnt_idmap;
+ rd.old_parent = lower_old_dir_dentry;
+ rd.new_parent = lower_new_dir_dentry;
+ rc = start_renaming_two_dentries(&rd, lower_old_dentry, lower_new_dentry);
+ if (rc)
+ return rc;
- rd.mnt_idmap = &nop_mnt_idmap;
- rd.old_parent = lower_old_dir_dentry;
- rd.old_dentry = lower_old_dentry;
- rd.new_parent = lower_new_dir_dentry;
- rd.new_dentry = lower_new_dentry;
rc = vfs_rename(&rd);
if (rc)
goto out_lock;
@@ -640,8 +632,7 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (new_dir != old_dir)
fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry));
out_lock:
- dput(lower_new_dentry);
- unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ end_renaming(&rd);
return rc;
}
diff --git a/fs/namei.c b/fs/namei.c
index 4a4b8b96c192..8b7807cd1343 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3397,6 +3397,39 @@ struct dentry *start_removing_noperm(struct dentry *parent,
}
EXPORT_SYMBOL(start_removing_noperm);
+/**
+ * start_creating_dentry - prepare to create a given dentry
+ * @parent: directory from which dentry should be removed
+ * @child: the dentry to be removed
+ *
+ * A lock is taken to protect the dentry again other dirops and
+ * the validity of the dentry is checked: correct parent and still hashed.
+ *
+ * If the dentry is valid and negative a reference is taken and
+ * returned. If not an error is returned.
+ *
+ * end_creating() should be called when creation is complete, or aborted.
+ *
+ * Returns: the valid dentry, or an error.
+ */
+struct dentry *start_creating_dentry(struct dentry *parent,
+ struct dentry *child)
+{
+ inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
+ if (unlikely(IS_DEADDIR(parent->d_inode) ||
+ child->d_parent != parent ||
+ d_unhashed(child))) {
+ inode_unlock(parent->d_inode);
+ return ERR_PTR(-EINVAL);
+ }
+ if (d_is_positive(child)) {
+ inode_unlock(parent->d_inode);
+ return ERR_PTR(-EEXIST);
+ }
+ return dget(child);
+}
+EXPORT_SYMBOL(start_creating_dentry);
+
/**
* start_removing_dentry - prepare to remove a given dentry
* @parent: directory from which dentry should be removed
diff --git a/include/linux/namei.h b/include/linux/namei.h
index a99ac8b7e24a..208aed1d6728 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -100,6 +100,8 @@ struct dentry *start_removing_killable(struct mnt_idmap *idmap,
struct qstr *name);
struct dentry *start_creating_noperm(struct dentry *parent, struct qstr *name);
struct dentry *start_removing_noperm(struct dentry *parent, struct qstr *name);
+struct dentry *start_creating_dentry(struct dentry *parent,
+ struct dentry *child);
struct dentry *start_removing_dentry(struct dentry *parent,
struct dentry *child);
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH v4 12/14] ecryptfs: use new start_creating/start_removing APIs
2025-10-29 23:31 ` [PATCH v4 12/14] ecryptfs: use new start_creating/start_removing APIs NeilBrown
@ 2025-10-30 6:24 ` Al Viro
2025-10-30 23:41 ` NeilBrown
0 siblings, 1 reply; 24+ messages in thread
From: Al Viro @ 2025-10-30 6:24 UTC (permalink / raw)
To: NeilBrown
Cc: Christian Brauner, Amir Goldstein, Jan Kara, linux-fsdevel,
Jeff Layton, Chris Mason, David Sterba, David Howells,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Tyler Hicks, Miklos Szeredi, Chuck Lever, Olga Kornievskaia,
Dai Ngo, Namjae Jeon, Steve French, Sergey Senozhatsky,
Carlos Maiolino, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek, Mateusz Guzik,
Lorenzo Stoakes, Stefan Berger, Darrick J. Wong, linux-kernel,
netfs, ecryptfs, linux-nfs, linux-unionfs, linux-cifs, linux-xfs,
apparmor, linux-security-module, selinux
On Thu, Oct 30, 2025 at 10:31:12AM +1100, NeilBrown wrote:
> +static struct dentry *ecryptfs_start_creating_dentry(struct dentry *dentry)
> {
> - struct dentry *lower_dir_dentry;
> + struct dentry *parent = dget_parent(dentry->d_parent);
"Grab the reference to grandparent"?
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH v4 12/14] ecryptfs: use new start_creating/start_removing APIs
2025-10-30 6:24 ` Al Viro
@ 2025-10-30 23:41 ` NeilBrown
0 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-30 23:41 UTC (permalink / raw)
To: Al Viro
Cc: Christian Brauner, Amir Goldstein, Jan Kara, linux-fsdevel,
Jeff Layton, Chris Mason, David Sterba, David Howells,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Tyler Hicks, Miklos Szeredi, Chuck Lever, Olga Kornievskaia,
Dai Ngo, Namjae Jeon, Steve French, Sergey Senozhatsky,
Carlos Maiolino, John Johansen, Paul Moore, James Morris,
Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek, Mateusz Guzik,
Lorenzo Stoakes, Stefan Berger, Darrick J. Wong, linux-kernel,
netfs, ecryptfs, linux-nfs, linux-unionfs, linux-cifs, linux-xfs,
apparmor, linux-security-module, selinux
On Thu, 30 Oct 2025, Al Viro wrote:
> On Thu, Oct 30, 2025 at 10:31:12AM +1100, NeilBrown wrote:
>
> > +static struct dentry *ecryptfs_start_creating_dentry(struct dentry *dentry)
> > {
> > - struct dentry *lower_dir_dentry;
> > + struct dentry *parent = dget_parent(dentry->d_parent);
>
> "Grab the reference to grandparent"?
>
That's somewhat embarrassing :-(
Fixed as below.
Thanks a lot!
NeilBrown
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index b3702105d236..6a5bca89e752 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -26,7 +26,7 @@
static struct dentry *ecryptfs_start_creating_dentry(struct dentry *dentry)
{
- struct dentry *parent = dget_parent(dentry->d_parent);
+ struct dentry *parent = dget_parent(dentry);
struct dentry *ret;
ret = start_creating_dentry(ecryptfs_dentry_to_lower(parent),
@@ -37,7 +37,7 @@ static struct dentry *ecryptfs_start_creating_dentry(struct dentry *dentry)
static struct dentry *ecryptfs_start_removing_dentry(struct dentry *dentry)
{
- struct dentry *parent = dget_parent(dentry->d_parent);
+ struct dentry *parent = dget_parent(dentry);
struct dentry *ret;
ret = start_removing_dentry(ecryptfs_dentry_to_lower(parent),
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PATCH v4 13/14] VFS: change vfs_mkdir() to unlock on failure.
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (11 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 12/14] ecryptfs: use new start_creating/start_removing APIs NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
2025-10-29 23:31 ` [PATCH v4 14/14] VFS: introduce end_creating_keep() NeilBrown
13 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
vfs_mkdir() already drops the reference to the dentry on failure but it
leaves the parent locked.
This complicates end_creating() which needs to unlock the parent even
though the dentry is no longer available.
If we change vfs_mkdir() to unlock on failure as well as releasing the
dentry, we can remove the "parent" arg from end_creating() and simplify
the rules for calling it.
Note that cachefiles_get_directory() can choose to substitute an error
instead of actually calling vfs_mkdir(), for fault injection. In that
case it needs to call end_creating(), just as vfs_mkdir() now does on
error.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
--
changes since v2:
- extra {} in if() branch in cachefiles_get_directory() to
match the new extra {} in the else branch.
- filesystems/porting.rst updated.
---
Documentation/filesystems/porting.rst | 13 +++++++++++++
fs/btrfs/ioctl.c | 2 +-
fs/cachefiles/namei.c | 16 ++++++++-------
fs/ecryptfs/inode.c | 8 ++++----
fs/namei.c | 4 ++--
fs/nfsd/nfs3proc.c | 2 +-
fs/nfsd/nfs4proc.c | 2 +-
fs/nfsd/nfs4recover.c | 2 +-
fs/nfsd/nfsproc.c | 2 +-
fs/nfsd/vfs.c | 8 ++++----
fs/overlayfs/copy_up.c | 4 ++--
fs/overlayfs/dir.c | 13 ++++++-------
fs/overlayfs/super.c | 6 +++---
fs/xfs/scrub/orphanage.c | 2 +-
include/linux/namei.h | 28 +++++++++------------------
ipc/mqueue.c | 2 +-
16 files changed, 59 insertions(+), 55 deletions(-)
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 7233b04668fc..76ff738a00f3 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1309,3 +1309,16 @@ a different length, use
vfs_parse_fs_qstr(fc, key, &QSTR_LEN(value, len))
instead.
+
+---
+
+**mandatory**
+
+vfs_mkdir() now returns a dentry - the one returned by ->mkdir(). If
+that dentry is different from the dentry passed in, including if it is
+an IS_ERR() dentry pointer, the original dentry is dput().
+
+When vfs_mkdir() returns an error, and so both dputs() the original
+dentry and doesn't provide a replacement, it also unlocks the parent.
+Consequently the return value from vfs_mkdir() can be passed to
+end_creating() and the parent will be unlocked precisely when necessary.
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4fbfdd8faf6a..90ef777eae25 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -935,7 +935,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
out_up_read:
up_read(&fs_info->subvol_sem);
out_dput:
- end_creating(dentry, parent);
+ end_creating(dentry);
return ret;
}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index b97a40917a32..c417ba4bcec3 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -128,10 +128,12 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
if (ret < 0)
goto mkdir_error;
ret = cachefiles_inject_write_error();
- if (ret == 0)
+ if (ret == 0) {
subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700);
- else
+ } else {
+ end_creating(subdir);
subdir = ERR_PTR(ret);
+ }
if (IS_ERR(subdir)) {
trace_cachefiles_vfs_error(NULL, d_inode(dir), ret,
cachefiles_trace_mkdir_error);
@@ -140,7 +142,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
trace_cachefiles_mkdir(dir, subdir);
if (unlikely(d_unhashed(subdir) || d_is_negative(subdir))) {
- end_creating(subdir, dir);
+ end_creating(subdir);
goto retry;
}
ASSERT(d_backing_inode(subdir));
@@ -154,7 +156,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
/* Tell rmdir() it's not allowed to delete the subdir */
inode_lock(d_inode(subdir));
dget(subdir);
- end_creating(subdir, dir);
+ end_creating(subdir);
if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) {
pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
@@ -196,7 +198,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
return ERR_PTR(-EBUSY);
mkdir_error:
- end_creating(subdir, dir);
+ end_creating(subdir);
pr_err("mkdir %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
@@ -705,7 +707,7 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
if (ret < 0)
goto out_end;
- end_creating(dentry, fan);
+ end_creating(dentry);
ret = cachefiles_inject_read_error();
if (ret == 0)
@@ -739,7 +741,7 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
}
out_end:
- end_creating(dentry, fan);
+ end_creating(dentry);
out:
_leave(" = %u", success);
return success;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index b3702105d236..90d74ecc5028 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -211,7 +211,7 @@ ecryptfs_do_create(struct inode *directory_inode,
fsstack_copy_attr_times(directory_inode, lower_dir);
fsstack_copy_inode_size(directory_inode, lower_dir);
out_lock:
- end_creating(lower_dentry, NULL);
+ end_creating(lower_dentry);
return inode;
}
@@ -456,7 +456,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
ecryptfs_inode_to_lower(d_inode(old_dentry))->i_nlink);
i_size_write(d_inode(new_dentry), file_size_save);
out_lock:
- end_creating(lower_new_dentry, NULL);
+ end_creating(lower_new_dentry);
return rc;
}
@@ -500,7 +500,7 @@ static int ecryptfs_symlink(struct mnt_idmap *idmap,
fsstack_copy_attr_times(dir, lower_dir);
fsstack_copy_inode_size(dir, lower_dir);
out_lock:
- end_creating(lower_dentry, NULL);
+ end_creating(lower_dentry);
if (d_really_is_negative(dentry))
d_drop(dentry);
return rc;
@@ -534,7 +534,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
fsstack_copy_inode_size(dir, lower_dir);
set_nlink(dir, lower_dir->i_nlink);
out:
- end_creating(lower_dentry, lower_dir_dentry);
+ end_creating(lower_dentry);
if (d_really_is_negative(dentry))
d_drop(dentry);
return ERR_PTR(rc);
diff --git a/fs/namei.c b/fs/namei.c
index 8b7807cd1343..d284ebae41bf 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4832,7 +4832,7 @@ EXPORT_SYMBOL(start_creating_path);
*/
void end_creating_path(const struct path *path, struct dentry *dentry)
{
- end_creating(dentry, path->dentry);
+ end_creating(dentry);
mnt_drop_write(path->mnt);
path_put(path);
}
@@ -5034,7 +5034,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
return dentry;
err:
- dput(dentry);
+ end_creating(dentry);
return ERR_PTR(error);
}
EXPORT_SYMBOL(vfs_mkdir);
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index e2aac0def2cb..6b39e4aff959 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -364,7 +364,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs);
out:
- end_creating(child, parent);
+ end_creating(child);
out_write:
fh_drop_write(fhp);
return status;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b2c95e8e7c68..524cb07a477c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -376,7 +376,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (attrs.na_aclerr)
open->op_bmval[0] &= ~FATTR4_WORD0_ACL;
out:
- end_creating(child, parent);
+ end_creating(child);
nfsd_attrs_free(&attrs);
out_write:
fh_drop_write(fhp);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 3eefaa2202e3..18c08395b273 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -215,7 +215,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
if (IS_ERR(dentry))
status = PTR_ERR(dentry);
out_end:
- end_creating(dentry, dir);
+ end_creating(dentry);
out:
if (status == 0) {
if (nn->in_grace)
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index ee1b16e921fd..28f03a6a3cc3 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -421,7 +421,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
}
out_unlock:
- end_creating(dchild, dirfhp->fh_dentry);
+ end_creating(dchild);
out_write:
fh_drop_write(dirfhp);
done:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 62109885d4db..6e9a57863904 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1589,7 +1589,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
out:
if (!err)
fh_fill_post_attrs(fhp);
- end_creating(dchild, dentry);
+ end_creating(dchild);
return err;
out_nfserr:
@@ -1646,7 +1646,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
return err;
out_unlock:
- end_creating(dchild, dentry);
+ end_creating(dchild);
return err;
}
@@ -1747,7 +1747,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
fh_fill_post_attrs(fhp);
out_unlock:
- end_creating(dnew, dentry);
+ end_creating(dnew);
if (!err)
err = nfserrno(commit_metadata(fhp));
if (!err)
@@ -1824,7 +1824,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL);
fh_fill_post_attrs(ffhp);
out_unlock:
- end_creating(dnew, ddir);
+ end_creating(dnew);
if (!host_err) {
host_err = commit_metadata(ffhp);
if (!host_err)
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 27014ada11c7..36949856ddea 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -624,7 +624,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
ovl_dentry_set_upper_alias(c->dentry);
ovl_dentry_update_reval(c->dentry, upper);
}
- end_creating(upper, upperdir);
+ end_creating(upper);
}
if (err)
goto out;
@@ -891,7 +891,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
err = PTR_ERR(upper);
if (!IS_ERR(upper)) {
err = ovl_do_link(ofs, temp, udir, upper);
- end_creating(upper, c->destdir);
+ end_creating(upper);
}
if (err)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index c297648c7487..10e732360ace 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -91,7 +91,7 @@ static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
err = ovl_do_whiteout(ofs, wdir, whiteout);
if (!err)
ofs->whiteout = dget(whiteout);
- end_creating(whiteout, workdir);
+ end_creating(whiteout);
if (err)
return ERR_PTR(err);
}
@@ -103,7 +103,7 @@ static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
err = ovl_do_link(ofs, ofs->whiteout, wdir, link);
if (!err)
whiteout = dget(link);
- end_creating(link, workdir);
+ end_creating(link);
if (!err)
return whiteout;;
@@ -254,7 +254,7 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
ret = ovl_create_real(ofs, workdir, ret, attr);
if (!IS_ERR(ret))
dget(ret);
- end_creating(ret, workdir);
+ end_creating(ret);
return ret;
}
@@ -362,12 +362,11 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
if (IS_ERR(newdentry))
return PTR_ERR(newdentry);
newdentry = ovl_create_real(ofs, upperdir, newdentry, attr);
- if (IS_ERR(newdentry)) {
- end_creating(newdentry, upperdir);
+ if (IS_ERR(newdentry))
return PTR_ERR(newdentry);
- }
+
dget(newdentry);
- end_creating(newdentry, upperdir);
+ end_creating(newdentry);
if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) &&
!ovl_allow_offline_changes(ofs)) {
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index a721ef2b90e8..3acda985c8a3 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -320,7 +320,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
if (work->d_inode) {
dget(work);
- end_creating(work, ofs->workbasedir);
+ end_creating(work);
if (persist)
return work;
err = -EEXIST;
@@ -338,7 +338,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode);
if (!IS_ERR(work))
dget(work);
- end_creating(work, ofs->workbasedir);
+ end_creating(work);
err = PTR_ERR(work);
if (IS_ERR(work))
goto out_err;
@@ -632,7 +632,7 @@ static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs,
OVL_CATTR(mode));
if (!IS_ERR(child))
dget(child);
- end_creating(child, parent);
+ end_creating(child);
}
dput(parent);
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index e732605924a1..b77c2b6b6d44 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -199,7 +199,7 @@ xrep_orphanage_create(
sc->orphanage_ilock_flags = 0;
out_dput_orphanage:
- end_creating(orphanage_dentry, root_dentry);
+ end_creating(orphanage_dentry);
out_dput_root:
dput(root_dentry);
out:
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 208aed1d6728..0ef73d739a31 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -105,34 +105,24 @@ struct dentry *start_creating_dentry(struct dentry *parent,
struct dentry *start_removing_dentry(struct dentry *parent,
struct dentry *child);
-/**
- * end_creating - finish action started with start_creating
- * @child: dentry returned by start_creating() or vfs_mkdir()
- * @parent: dentry given to start_creating(),
- *
- * Unlock and release the child.
+/* end_creating - finish action started with start_creating
+ * @child: dentry returned by start_creating() or vfs_mkdir()
*
- * Unlike end_dirop() this can only be called if start_creating() succeeded.
- * It handles @child being and error as vfs_mkdir() might have converted the
- * dentry to an error - in that case the parent still needs to be unlocked.
+ * Unlock and release the child. This can be called after
+ * start_creating() whether that function succeeded or not,
+ * but it is not needed on failure.
*
* If vfs_mkdir() was called then the value returned from that function
* should be given for @child rather than the original dentry, as vfs_mkdir()
- * may have provided a new dentry. Even if vfs_mkdir() returns an error
- * it must be given to end_creating().
+ * may have provided a new dentry.
+ *
*
* If vfs_mkdir() was not called, then @child will be a valid dentry and
* @parent will be ignored.
*/
-static inline void end_creating(struct dentry *child, struct dentry *parent)
+static inline void end_creating(struct dentry *child)
{
- if (IS_ERR(child))
- /* The parent is still locked despite the error from
- * vfs_mkdir() - must unlock it.
- */
- inode_unlock(parent->d_inode);
- else
- end_dirop(child);
+ end_dirop(child);
}
/**
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 6d7610310003..83d9466710d6 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -932,7 +932,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
put_unused_fd(fd);
fd = error;
}
- end_creating(path.dentry, root);
+ end_creating(path.dentry);
if (!ro)
mnt_drop_write(mnt);
out_putname:
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH v4 14/14] VFS: introduce end_creating_keep()
2025-10-29 23:31 [PATCH v4 00/14] Create and use APIs to centralise locking for directory ops NeilBrown
` (12 preceding siblings ...)
2025-10-29 23:31 ` [PATCH v4 13/14] VFS: change vfs_mkdir() to unlock on failure NeilBrown
@ 2025-10-29 23:31 ` NeilBrown
13 siblings, 0 replies; 24+ messages in thread
From: NeilBrown @ 2025-10-29 23:31 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner, Amir Goldstein
Cc: Jan Kara, linux-fsdevel, Jeff Layton, Chris Mason, David Sterba,
David Howells, Greg Kroah-Hartman, Rafael J. Wysocki,
Danilo Krummrich, Tyler Hicks, Miklos Szeredi, Chuck Lever,
Olga Kornievskaia, Dai Ngo, Namjae Jeon, Steve French,
Sergey Senozhatsky, Carlos Maiolino, John Johansen, Paul Moore,
James Morris, Serge E. Hallyn, Stephen Smalley, Ondrej Mosnacek,
Mateusz Guzik, Lorenzo Stoakes, Stefan Berger, Darrick J. Wong,
linux-kernel, netfs, ecryptfs, linux-nfs, linux-unionfs,
linux-cifs, linux-xfs, apparmor, linux-security-module, selinux
From: NeilBrown <neil@brown.name>
Occasionally the caller of end_creating() wants to keep using the dentry.
Rather then requiring them to dget() the dentry (when not an error)
before calling end_creating(), provide end_creating_keep() which does
this.
cachefiles and overlayfs make use of this.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: NeilBrown <neil@brown.name>
---
fs/cachefiles/namei.c | 3 +--
fs/overlayfs/dir.c | 8 ++------
fs/overlayfs/super.c | 11 +++--------
include/linux/namei.h | 22 ++++++++++++++++++++++
4 files changed, 28 insertions(+), 16 deletions(-)
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index c417ba4bcec3..4c72af174540 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -155,8 +155,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
/* Tell rmdir() it's not allowed to delete the subdir */
inode_lock(d_inode(subdir));
- dget(subdir);
- end_creating(subdir);
+ end_creating_keep(subdir);
if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) {
pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 10e732360ace..8faab04dc79f 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -252,10 +252,7 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
if (IS_ERR(ret))
return ret;
ret = ovl_create_real(ofs, workdir, ret, attr);
- if (!IS_ERR(ret))
- dget(ret);
- end_creating(ret);
- return ret;
+ return end_creating_keep(ret);
}
static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
@@ -365,8 +362,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
if (IS_ERR(newdentry))
return PTR_ERR(newdentry);
- dget(newdentry);
- end_creating(newdentry);
+ end_creating_keep(newdentry);
if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) &&
!ovl_allow_offline_changes(ofs)) {
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 3acda985c8a3..7b8fc1cab6eb 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -319,8 +319,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
};
if (work->d_inode) {
- dget(work);
- end_creating(work);
+ end_creating_keep(work);
if (persist)
return work;
err = -EEXIST;
@@ -336,9 +335,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
}
work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode);
- if (!IS_ERR(work))
- dget(work);
- end_creating(work);
+ end_creating_keep(work);
err = PTR_ERR(work);
if (IS_ERR(work))
goto out_err;
@@ -630,9 +627,7 @@ static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs,
if (!child->d_inode)
child = ovl_create_real(ofs, parent, child,
OVL_CATTR(mode));
- if (!IS_ERR(child))
- dget(child);
- end_creating(child);
+ end_creating_keep(child);
}
dput(parent);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 0ef73d739a31..3d82c6a19197 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -125,6 +125,28 @@ static inline void end_creating(struct dentry *child)
end_dirop(child);
}
+/* end_creating_keep - finish action started with start_creating() and return result
+ * @child: dentry returned by start_creating() or vfs_mkdir()
+ *
+ * Unlock and return the child. This can be called after
+ * start_creating() whether that function succeeded or not,
+ * but it is not needed on failure.
+ *
+ * If vfs_mkdir() was called then the value returned from that function
+ * should be given for @child rather than the original dentry, as vfs_mkdir()
+ * may have provided a new dentry.
+ *
+ * Returns: @child, which may be a dentry or an error.
+ *
+ */
+static inline struct dentry *end_creating_keep(struct dentry *child)
+{
+ if (!IS_ERR(child))
+ dget(child);
+ end_dirop(child);
+ return child;
+}
+
/**
* end_removing - finish action started with start_removing
* @child: dentry returned by start_removing()
--
2.50.0.107.gf914562f5916.dirty
^ permalink raw reply related [flat|nested] 24+ messages in thread