* [PATCH] fs: allow rename across bind mounts on same superblock
@ 2025-12-23 17:38 Andrei Topala
2025-12-23 17:51 ` Al Viro
2025-12-30 5:57 ` kernel test robot
0 siblings, 2 replies; 5+ messages in thread
From: Andrei Topala @ 2025-12-23 17:38 UTC (permalink / raw)
To: Alexander Viro, Christian Brauner; +Cc: Jan Kara, linux-fsdevel, Andrei Topala
Currently rename() returns EXDEV when source and destination are on
different mounts, even if they're bind mounts of the same filesystem.
This forces userspace (mv) to fall back to slow copy+delete.
Change the check from comparing mount pointers to comparing superblocks,
which allows renaming across bind mounts within the same filesystem.
This also handles the case where source and destination mounts have different
properties (idmaps, read-only flags) by checking both mounts appropriately.
Signed-off-by: Andrei Topala <topala.andrei@gmail.com>
---
fs/cachefiles/namei.c | 3 ++-
fs/ecryptfs/inode.c | 3 ++-
fs/namei.c | 39 +++++++++++++++++++++++++--------------
fs/nfsd/vfs.c | 3 ++-
fs/overlayfs/copy_up.c | 6 ++++--
fs/overlayfs/dir.c | 12 ++++++++----
fs/overlayfs/overlayfs.h | 3 ++-
fs/overlayfs/super.c | 3 ++-
fs/smb/server/vfs.c | 3 ++-
include/linux/fs.h | 6 ++++--
10 files changed, 53 insertions(+), 28 deletions(-)
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index e5ec90dcc..771f169cc 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -383,7 +383,8 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
cachefiles_io_error(cache, "Rename security error %d", ret);
} else {
struct renamedata rd = {
- .mnt_idmap = &nop_mnt_idmap,
+ .old_mnt_idmap = &nop_mnt_idmap,
+ .new_mnt_idmap = &nop_mnt_idmap,
.old_parent = dir,
.old_dentry = rep,
.new_parent = cache->graveyard,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 397824824..1d59b8a2e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -614,7 +614,8 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
target_inode = d_inode(new_dentry);
- rd.mnt_idmap = &nop_mnt_idmap;
+ rd.old_mnt_idmap = &nop_mnt_idmap;
+ rd.new_mnt_idmap = &nop_mnt_idmap;
rd.old_parent = lower_old_dir_dentry;
rd.new_parent = lower_new_dir_dentry;
rc = start_renaming_two_dentries(&rd, lower_old_dentry, lower_new_dentry);
diff --git a/fs/namei.c b/fs/namei.c
index bf0f66f0e..be9931d63 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3867,7 +3867,8 @@ __start_renaming(struct renamedata *rd, int lookup_flags,
* These references and the lock are dropped by end_renaming().
*
* The passed in qstrs need not have the hash calculated, and basic
- * eXecute permission checking is performed against @rd.mnt_idmap.
+ * eXecute permission checking is performed against @rd.old_mnt_idmap
+ * and @rd.new_mnt_idmap respectively.
*
* Returns: zero or an error.
*/
@@ -3876,10 +3877,10 @@ int start_renaming(struct renamedata *rd, int lookup_flags,
{
int err;
- err = lookup_one_common(rd->mnt_idmap, old_last, rd->old_parent);
+ err = lookup_one_common(rd->old_mnt_idmap, old_last, rd->old_parent);
if (err)
return err;
- err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent);
+ err = lookup_one_common(rd->new_mnt_idmap, new_last, rd->new_parent);
if (err)
return err;
return __start_renaming(rd, lookup_flags, old_last, new_last);
@@ -3964,7 +3965,7 @@ __start_renaming_dentry(struct renamedata *rd, int lookup_flags,
* References and the lock can be dropped with end_renaming()
*
* The passed in qstr need not have the hash calculated, and basic
- * eXecute permission checking is performed against @rd.mnt_idmap.
+ * eXecute permission checking is performed against @rd.new_mnt_idmap.
*
* Returns: zero or an error.
*/
@@ -3973,7 +3974,7 @@ int start_renaming_dentry(struct renamedata *rd, int lookup_flags,
{
int err;
- err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent);
+ err = lookup_one_common(rd->new_mnt_idmap, new_last, rd->new_parent);
if (err)
return err;
return __start_renaming_dentry(rd, lookup_flags, old_dentry, new_last);
@@ -5816,20 +5817,20 @@ int vfs_rename(struct renamedata *rd)
if (source == target)
return 0;
- error = may_delete(rd->mnt_idmap, old_dir, old_dentry, is_dir);
+ error = may_delete(rd->old_mnt_idmap, old_dir, old_dentry, is_dir);
if (error)
return error;
if (!target) {
- error = may_create(rd->mnt_idmap, new_dir, new_dentry);
+ error = may_create(rd->new_mnt_idmap, new_dir, new_dentry);
} else {
new_is_dir = d_is_dir(new_dentry);
if (!(flags & RENAME_EXCHANGE))
- error = may_delete(rd->mnt_idmap, new_dir,
+ error = may_delete(rd->new_mnt_idmap, new_dir,
new_dentry, is_dir);
else
- error = may_delete(rd->mnt_idmap, new_dir,
+ error = may_delete(rd->new_mnt_idmap, new_dir,
new_dentry, new_is_dir);
}
if (error)
@@ -5844,13 +5845,13 @@ int vfs_rename(struct renamedata *rd)
*/
if (new_dir != old_dir) {
if (is_dir) {
- error = inode_permission(rd->mnt_idmap, source,
+ error = inode_permission(rd->old_mnt_idmap, source,
MAY_WRITE);
if (error)
return error;
}
if ((flags & RENAME_EXCHANGE) && new_is_dir) {
- error = inode_permission(rd->mnt_idmap, target,
+ error = inode_permission(rd->new_mnt_idmap, target,
MAY_WRITE);
if (error)
return error;
@@ -5926,7 +5927,7 @@ int vfs_rename(struct renamedata *rd)
if (error)
goto out;
}
- error = old_dir->i_op->rename(rd->mnt_idmap, old_dir, old_dentry,
+ error = old_dir->i_op->rename(rd->old_mnt_idmap, old_dir, old_dentry,
new_dir, new_dentry, flags);
if (error)
goto out;
@@ -5996,7 +5997,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
goto exit1;
error = -EXDEV;
- if (old_path.mnt != new_path.mnt)
+ if (old_path.mnt->mnt_sb != new_path.mnt->mnt_sb)
goto exit2;
error = -EBUSY;
@@ -6012,9 +6013,16 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
if (error)
goto exit2;
+ if (old_path.mnt != new_path.mnt) {
+ error = mnt_want_write(new_path.mnt);
+ if (error)
+ goto exit3;
+ }
+
retry_deleg:
rd.old_parent = old_path.dentry;
- rd.mnt_idmap = mnt_idmap(old_path.mnt);
+ rd.old_mnt_idmap = mnt_idmap(old_path.mnt);
+ rd.new_mnt_idmap = mnt_idmap(new_path.mnt);
rd.new_parent = new_path.dentry;
rd.delegated_inode = &delegated_inode;
rd.flags = flags;
@@ -6053,6 +6061,9 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
if (!error)
goto retry_deleg;
}
+ if (old_path.mnt != new_path.mnt)
+ mnt_drop_write(new_path.mnt);
+exit3:
mnt_drop_write(old_path.mnt);
exit2:
if (retry_estale(error, lookup_flags))
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 964cf922a..5ffcd279f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2154,7 +2154,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out;
}
- rd.mnt_idmap = &nop_mnt_idmap;
+ rd.old_mnt_idmap = &nop_mnt_idmap;
+ rd.new_mnt_idmap = &nop_mnt_idmap;
rd.old_parent = fdentry;
rd.new_parent = tdentry;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 758611ee4..0f1df4fef 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -556,7 +556,8 @@ static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh,
if (err)
goto out;
- rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.new_mnt_idmap = ovl_upper_mnt_idmap(ofs);
rd.old_parent = indexdir;
rd.new_parent = indexdir;
err = start_renaming_dentry(&rd, 0, temp, &name);
@@ -804,7 +805,8 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
* ovl_copy_up_data(), so lock workdir and destdir and make sure that
* temp wasn't moved before copy up completion or cleanup.
*/
- rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.new_mnt_idmap = ovl_upper_mnt_idmap(ofs);
rd.old_parent = c->workdir;
rd.new_parent = c->destdir;
rd.flags = 0;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index ff3dbd1ca..cb988dfa3 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -135,7 +135,8 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *dir,
if (d_is_dir(dentry))
flags = RENAME_EXCHANGE;
- rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.new_mnt_idmap = ovl_upper_mnt_idmap(ofs);
rd.old_parent = ofs->workdir;
rd.new_parent = dir;
rd.flags = flags;
@@ -413,7 +414,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (IS_ERR(opaquedir))
goto out;
- rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.new_mnt_idmap = ovl_upper_mnt_idmap(ofs);
rd.old_parent = workdir;
rd.new_parent = upperdir;
rd.flags = RENAME_EXCHANGE;
@@ -504,7 +506,8 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (IS_ERR(newdentry))
goto out_dput;
- rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.new_mnt_idmap = ovl_upper_mnt_idmap(ofs);
rd.old_parent = workdir;
rd.new_parent = upperdir;
rd.flags = 0;
@@ -1230,7 +1233,8 @@ static int ovl_rename_upper(struct ovl_renamedata *ovlrd, struct list_head *list
}
}
- rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.new_mnt_idmap = ovl_upper_mnt_idmap(ofs);
rd.old_parent = old_upperdir;
rd.new_parent = new_upperdir;
rd.flags = ovlrd->flags;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index f9ac9bdde..4b7824761 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -374,7 +374,8 @@ static inline int ovl_do_rename(struct ovl_fs *ofs, struct dentry *olddir,
struct dentry *newdentry, unsigned int flags)
{
struct renamedata rd = {
- .mnt_idmap = ovl_upper_mnt_idmap(ofs),
+ .old_mnt_idmap = ovl_upper_mnt_idmap(ofs),
+ .new_mnt_idmap = ovl_upper_mnt_idmap(ofs),
.old_parent = olddir,
.old_dentry = olddentry,
.new_parent = newdir,
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ba9146f22..86d164c05 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -582,7 +582,8 @@ static int ovl_check_rename_whiteout(struct ovl_fs *ofs)
if (IS_ERR(temp))
return err;
- rd.mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.old_mnt_idmap = ovl_upper_mnt_idmap(ofs);
+ rd.new_mnt_idmap = ovl_upper_mnt_idmap(ofs);
rd.old_parent = workdir;
rd.new_parent = workdir;
rd.flags = RENAME_WHITEOUT;
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index f891344bd..f9e46b6e0 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -698,7 +698,8 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
if (err)
goto out2;
- rd.mnt_idmap = mnt_idmap(old_path->mnt);
+ rd.old_mnt_idmap = mnt_idmap(old_path->mnt);
+ rd.new_mnt_idmap = mnt_idmap(new_path.mnt);
rd.old_parent = NULL;
rd.new_parent = new_path.dentry;
rd.flags = flags;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5c9cf28c..4b7a1fd23 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1774,7 +1774,8 @@ int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *,
/**
* struct renamedata - contains all information required for renaming
- * @mnt_idmap: idmap of the mount in which the rename is happening.
+ * @old_mnt_idmap: idmap of the source mount
+ * @new_mnt_idmap: idmap of the destination mount
* @old_parent: parent of source
* @old_dentry: source
* @new_parent: parent of destination
@@ -1783,7 +1784,8 @@ int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *,
* @flags: rename flags
*/
struct renamedata {
- struct mnt_idmap *mnt_idmap;
+ struct mnt_idmap *old_mnt_idmap;
+ struct mnt_idmap *new_mnt_idmap;
struct dentry *old_parent;
struct dentry *old_dentry;
struct dentry *new_parent;
--
2.50.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] fs: allow rename across bind mounts on same superblock
2025-12-23 17:38 [PATCH] fs: allow rename across bind mounts on same superblock Andrei Topala
@ 2025-12-23 17:51 ` Al Viro
[not found] ` <CAF8SvsB0yQC7Meni=UQEehaT5YBQx2uEas8irhg3vWstdM_JVA@mail.gmail.com>
2025-12-30 5:57 ` kernel test robot
1 sibling, 1 reply; 5+ messages in thread
From: Al Viro @ 2025-12-23 17:51 UTC (permalink / raw)
To: Andrei Topala; +Cc: Christian Brauner, Jan Kara, linux-fsdevel
On Tue, Dec 23, 2025 at 05:38:03PM +0000, Andrei Topala wrote:
> Currently rename() returns EXDEV when source and destination are on
> different mounts, even if they're bind mounts of the same filesystem.
> This forces userspace (mv) to fall back to slow copy+delete.
>
> Change the check from comparing mount pointers to comparing superblocks,
> which allows renaming across bind mounts within the same filesystem.
... which destroys the use of bind mount as rename boundary. Why is that
a good idea?
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] fs: allow rename across bind mounts on same superblock
[not found] ` <CAF8SvsB0yQC7Meni=UQEehaT5YBQx2uEas8irhg3vWstdM_JVA@mail.gmail.com>
@ 2025-12-23 18:59 ` Al Viro
2025-12-23 23:59 ` Andrei Topala
0 siblings, 1 reply; 5+ messages in thread
From: Al Viro @ 2025-12-23 18:59 UTC (permalink / raw)
To: Andrei Topala; +Cc: Christian Brauner, Jan Kara, linux-fsdevel
On Tue, Dec 23, 2025 at 08:27:58PM +0200, Andrei Topala wrote:
> Rename would not work if the user doesn't have the right permissions. The
> patch calls `mnt_want_write()` on both mounts and uses each side's idmap
> for permission checks.
>
> This would make renames faster in this case. Currently, users who have
> write permission on both mounts get EXDEV and must fall back to copying the
> data, which can be slow for large files.
>
> I initially attempted to handle this in userspace (in coreutils mv), but
> it's more complex there. It requires parsing /proc/self/mountinfo to
> translate paths through bind mount roots, and it only benefits mv. Other
> tools that call rename() directly would hit the slow path.
>
> Is there a specific isolation scenario where allowing rename between
> same-superblock mounts would be problematic? I'd like to understand the
> concern better.
Consider fun with moving a subdirectory of a mounted subtree to another
mounted subtree, while some process has been chdired into it, for
starters...
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] fs: allow rename across bind mounts on same superblock
2025-12-23 18:59 ` Al Viro
@ 2025-12-23 23:59 ` Andrei Topala
0 siblings, 0 replies; 5+ messages in thread
From: Andrei Topala @ 2025-12-23 23:59 UTC (permalink / raw)
To: Al Viro; +Cc: Christian Brauner, Jan Kara, linux-fsdevel
> Consider fun with moving a subdirectory of a mounted subtree to another
> mounted subtree, while some process has been chdired into it, for
> starters...
I see the same situation can occur when renaming through the
underlying mount. If process A does chdir("/bind1/subdir") and
process B does rename("/mnt/fs/dir1/subdir", "/mnt/fs/dir2/subdir"),
the rename succeeds while process A is inside.
I understand this patch would make it easier to trigger for
processes that only have access to bind mount paths. Is that the
main concern, or am I missing something more fundamental?
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] fs: allow rename across bind mounts on same superblock
2025-12-23 17:38 [PATCH] fs: allow rename across bind mounts on same superblock Andrei Topala
2025-12-23 17:51 ` Al Viro
@ 2025-12-30 5:57 ` kernel test robot
1 sibling, 0 replies; 5+ messages in thread
From: kernel test robot @ 2025-12-30 5:57 UTC (permalink / raw)
To: Andrei Topala
Cc: oe-lkp, lkp, netfs, ecryptfs, linux-fsdevel, linux-nfs,
linux-unionfs, linux-cifs, Alexander Viro, Christian Brauner,
Jan Kara, Andrei Topala, oliver.sang
Hello,
kernel test robot noticed "xfstests.generic.633.fail" on:
commit: 6c7ce82ab1bf6ff8bf562d830a5e9a2dd26b4ebb ("[PATCH] fs: allow rename across bind mounts on same superblock")
url: https://github.com/intel-lab-lkp/linux/commits/Andrei-Topala/fs-allow-rename-across-bind-mounts-on-same-superblock/20251224-020432
base: https://git.kernel.org/cgit/linux/kernel/git/vfs/vfs.git vfs.all
patch link: https://lore.kernel.org/all/20251223173803.1623903-1-topala.andrei@gmail.com/
patch subject: [PATCH] fs: allow rename across bind mounts on same superblock
in testcase: xfstests
version: xfstests-x86_64-a668057f-1_20251209
with following parameters:
disk: 4HDD
fs: ext2
test: generic-633
config: x86_64-rhel-9.4-func
compiler: gcc-14
test machine: 4 threads Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz (Skylake) with 32G memory
(please refer to attached dmesg/kmsg for entire log/backtrace)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202512301036.b3179a4b-lkp@intel.com
2025-12-28 19:38:18 cd /lkp/benchmarks/xfstests
2025-12-28 19:38:18 export TEST_DIR=/fs/sdb1
2025-12-28 19:38:18 export TEST_DEV=/dev/sdb1
2025-12-28 19:38:18 export FSTYP=ext2
2025-12-28 19:38:18 export SCRATCH_MNT=/fs/scratch
2025-12-28 19:38:18 mkdir /fs/scratch -p
2025-12-28 19:38:18 export SCRATCH_DEV=/dev/sdb4
2025-12-28 19:38:18 echo generic/633
2025-12-28 19:38:18 ./check -E tests/exclude/ext2 generic/633
FSTYP -- ext2
PLATFORM -- Linux/x86_64 lkp-skl-d03 6.19.0-rc1-00037-g6c7ce82ab1bf #1 SMP PREEMPT_DYNAMIC Mon Dec 29 01:50:58 CST 2025
MKFS_OPTIONS -- -F /dev/sdb4
MOUNT_OPTIONS -- -o acl,user_xattr /dev/sdb4 /fs/scratch
generic/633 [failed, exit status 1]- output mismatch (see /lkp/benchmarks/xfstests/results//generic/633.out.bad)
--- tests/generic/633.out 2025-12-09 15:20:52.000000000 +0000
+++ /lkp/benchmarks/xfstests/results//generic/633.out.bad 2025-12-28 19:41:48.101143921 +0000
@@ -1,2 +1,4 @@
QA output created by 633
Silence is golden
+vfstest.c: 199: rename_crossing_mounts - Success - failure: renameat
+vfstest.c: 2418: run_test - Success - failure: cross mount rename
...
(Run 'diff -u /lkp/benchmarks/xfstests/tests/generic/633.out /lkp/benchmarks/xfstests/results//generic/633.out.bad' to see the entire diff)
Ran: generic/633
Failures: generic/633
Failed 1 of 1 tests
The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20251230/202512301036.b3179a4b-lkp@intel.com
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-12-30 5:57 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-23 17:38 [PATCH] fs: allow rename across bind mounts on same superblock Andrei Topala
2025-12-23 17:51 ` Al Viro
[not found] ` <CAF8SvsB0yQC7Meni=UQEehaT5YBQx2uEas8irhg3vWstdM_JVA@mail.gmail.com>
2025-12-23 18:59 ` Al Viro
2025-12-23 23:59 ` Andrei Topala
2025-12-30 5:57 ` kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.