* [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
2025-02-07 3:36 [PATCH 0/2] VFS: minor improvements to a couple of interfaces NeilBrown
@ 2025-02-07 3:36 ` NeilBrown
2025-02-12 2:50 ` kernel test robot
2025-02-12 3:16 ` Al Viro
0 siblings, 2 replies; 15+ messages in thread
From: NeilBrown @ 2025-02-07 3:36 UTC (permalink / raw)
To: Christian Brauner, Alexander Viro, Jan Kara
Cc: Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Kent Overstreet, Trond Myklebust, Anna Schumaker, Namjae Jeon,
Steve French, Sergey Senozhatsky, Tom Talpey, Paul Moore,
Eric Paris, linux-kernel, linux-bcachefs, linux-fsdevel,
linux-nfs, linux-cifs, audit
Callers of lookup_one_qstr_excl() often check if the result is negative or
positive.
These changes can easily be moved into lookup_one_qstr_excl() by checking the
lookup flags:
LOOKUP_CREATE means it is NOT an error if the name doesn't exist.
LOOKUP_EXCL means it IS an error if the name DOES exist.
This patch adds these checks, then removes error checks from callers,
and ensures that appropriate flags are passed.
This subtly changes the meaning of LOOKUP_EXCL. Previously it could
only accompany LOOKUP_CREATE. Now it can accompany LOOKUP_RENAME_TARGET
as well. A couple of small changes are needed to accommodate this. The
NFS change is functionally a no-op but ensures nfs_is_exclusive_create() does
exactly what the name says.
Signed-off-by: NeilBrown <neilb@suse.de>
---
fs/namei.c | 59 +++++++++++++++------------------------------
fs/nfs/dir.c | 3 ++-
fs/smb/server/vfs.c | 26 ++++++++------------
3 files changed, 31 insertions(+), 57 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index e3047db7b2b4..e0527f4b0586 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1665,6 +1665,8 @@ static struct dentry *lookup_dcache(const struct qstr *name,
* dentries - as the matter of fact, this only gets called
* when directory is guaranteed to have no in-lookup children
* at all.
+ * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed.
+ * Will return -EEXIST if name is found and LOOKUP_EXCL was passed.
*/
struct dentry *lookup_one_qstr_excl(const struct qstr *name,
struct dentry *base,
@@ -1675,7 +1677,7 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
struct inode *dir = base->d_inode;
if (dentry)
- return dentry;
+ goto found;
/* Don't create child dentry for a dead directory. */
if (unlikely(IS_DEADDIR(dir)))
@@ -1690,6 +1692,15 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
dput(dentry);
dentry = old;
}
+found:
+ if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) {
+ dput(dentry);
+ return ERR_PTR(-ENOENT);
+ }
+ if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) {
+ dput(dentry);
+ return ERR_PTR(-EEXIST);
+ }
return dentry;
}
EXPORT_SYMBOL(lookup_one_qstr_excl);
@@ -2736,10 +2747,6 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct
}
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
d = lookup_one_qstr_excl(&last, path->dentry, 0);
- if (!IS_ERR(d) && d_is_negative(d)) {
- dput(d);
- d = ERR_PTR(-ENOENT);
- }
if (IS_ERR(d)) {
inode_unlock(path->dentry->d_inode);
path_put(path);
@@ -4077,27 +4084,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
* '/', and a directory wasn't requested.
*/
if (last.name[last.len] && !want_dir)
- create_flags = 0;
+ create_flags &= ~LOOKUP_CREATE;
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
dentry = lookup_one_qstr_excl(&last, path->dentry,
reval_flag | create_flags);
if (IS_ERR(dentry))
goto unlock;
- error = -EEXIST;
- if (d_is_positive(dentry))
- goto fail;
-
- /*
- * Special case - lookup gave negative, but... we had foo/bar/
- * From the vfs_mknod() POV we just have a negative dentry -
- * all is fine. Let's be bastards - you had / on the end, you've
- * been asking for (non-existent) directory. -ENOENT for you.
- */
- if (unlikely(!create_flags)) {
- error = -ENOENT;
- goto fail;
- }
if (unlikely(err2)) {
error = err2;
goto fail;
@@ -4444,10 +4437,6 @@ int do_rmdir(int dfd, struct filename *name)
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit3;
- if (!dentry->d_inode) {
- error = -ENOENT;
- goto exit4;
- }
error = security_path_rmdir(&path, dentry);
if (error)
goto exit4;
@@ -4578,7 +4567,7 @@ int do_unlinkat(int dfd, struct filename *name)
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
- if (last.name[last.len] || d_is_negative(dentry))
+ if (last.name[last.len])
goto slashes;
inode = dentry->d_inode;
ihold(inode);
@@ -4612,9 +4601,7 @@ int do_unlinkat(int dfd, struct filename *name)
return error;
slashes:
- if (d_is_negative(dentry))
- error = -ENOENT;
- else if (d_is_dir(dentry))
+ if (d_is_dir(dentry))
error = -EISDIR;
else
error = -ENOTDIR;
@@ -5114,7 +5101,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
struct qstr old_last, new_last;
int old_type, new_type;
struct inode *delegated_inode = NULL;
- unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
+ unsigned int lookup_flags = 0, target_flags =
+ LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
bool should_retry = false;
int error = -EINVAL;
@@ -5127,6 +5115,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
if (flags & RENAME_EXCHANGE)
target_flags = 0;
+ if (flags & RENAME_NOREPLACE)
+ target_flags |= LOOKUP_EXCL;
retry:
error = filename_parentat(olddfd, from, lookup_flags, &old_path,
@@ -5168,23 +5158,12 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
- /* source must exist */
- error = -ENOENT;
- if (d_is_negative(old_dentry))
- goto exit4;
new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
lookup_flags | target_flags);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
- error = -EEXIST;
- if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
- goto exit5;
if (flags & RENAME_EXCHANGE) {
- error = -ENOENT;
- if (d_is_negative(new_dentry))
- goto exit5;
-
if (!d_is_dir(new_dentry)) {
error = -ENOTDIR;
if (new_last.name[new_last.len])
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2b04038b0e40..56cf16a72334 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1532,7 +1532,8 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
{
if (NFS_PROTO(dir)->version == 2)
return 0;
- return flags & LOOKUP_EXCL;
+ return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) ==
+ (LOOKUP_CREATE | LOOKUP_EXCL);
}
/*
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 6890016e1923..fe29acef5872 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -113,11 +113,6 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
if (IS_ERR(d))
goto err_out;
- if (d_is_negative(d)) {
- dput(d);
- goto err_out;
- }
-
path->dentry = d;
path->mnt = mntget(parent_path->mnt);
@@ -693,6 +688,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
struct ksmbd_file *parent_fp;
int new_type;
int err, lookup_flags = LOOKUP_NO_SYMLINKS;
+ int target_lookup_flags = LOOKUP_RENAME_TARGET;
if (ksmbd_override_fsids(work))
return -ENOMEM;
@@ -703,6 +699,14 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto revert_fsids;
}
+ /*
+ * explicitly handle file overwrite case, for compatibility with
+ * filesystems that may not support rename flags (e.g: fuse)
+ */
+ if (flags & RENAME_NOREPLACE)
+ target_lookup_flags |= LOOKUP_EXCL;
+ flags &= ~(RENAME_NOREPLACE);
+
retry:
err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
&new_path, &new_last, &new_type,
@@ -743,7 +747,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
}
new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
- lookup_flags | LOOKUP_RENAME_TARGET);
+ lookup_flags | target_lookup_flags);
if (IS_ERR(new_dentry)) {
err = PTR_ERR(new_dentry);
goto out3;
@@ -754,16 +758,6 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto out4;
}
- /*
- * explicitly handle file overwrite case, for compatibility with
- * filesystems that may not support rename flags (e.g: fuse)
- */
- if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
- err = -EEXIST;
- goto out4;
- }
- flags &= ~(RENAME_NOREPLACE);
-
if (old_child == trap) {
err = -EINVAL;
goto out4;
--
2.47.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
2025-02-07 3:36 ` [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl() NeilBrown
@ 2025-02-12 2:50 ` kernel test robot
2025-02-12 3:16 ` Al Viro
1 sibling, 0 replies; 15+ messages in thread
From: kernel test robot @ 2025-02-12 2:50 UTC (permalink / raw)
To: NeilBrown
Cc: oe-lkp, lkp, linux-fsdevel, linux-nfs, linux-cifs,
Christian Brauner, Alexander Viro, Jan Kara, Greg Kroah-Hartman,
Rafael J. Wysocki, Danilo Krummrich, Kent Overstreet,
Trond Myklebust, Anna Schumaker, Namjae Jeon, Steve French,
Sergey Senozhatsky, Tom Talpey, Paul Moore, Eric Paris,
linux-kernel, linux-bcachefs, audit, oliver.sang
Hello,
kernel test robot noticed "BUG:unable_to_handle_page_fault_for_address" on:
commit: 9a292bc4cbb25ca84f90dbacdf3064a9d6e7804f ("[PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()")
url: https://github.com/intel-lab-lkp/linux/commits/NeilBrown/VFS-change-kern_path_locked-and-user_path_locked_at-to-never-return-negative-dentry/20250207-185417
base: https://git.kernel.org/cgit/linux/kernel/git/vfs/vfs.git vfs.all
patch link: https://lore.kernel.org/all/20250207034040.3402438-3-neilb@suse.de/
patch subject: [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
in testcase: trinity
version:
with following parameters:
runtime: 300s
group: group-01
nr_groups: 5
config: i386-randconfig-053-20250208
compiler: clang-19
test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 4G
(please refer to attached dmesg/kmsg for entire log/backtrace)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202502121009.f7bc8e67-lkp@intel.com
[ 163.118842][ T4188] BUG: unable to handle page fault for address: fffffffe
[ 163.119485][ T4188] #PF: supervisor read access in kernel mode
[ 163.120015][ T4188] #PF: error_code(0x0000) - not-present page
[ 163.120523][ T4188] *pde = 026d3067 *pte = 00000000
[ 163.120971][ T4188] Oops: Oops: 0000 [#1]
[ 163.121339][ T4188] CPU: 0 UID: 65534 PID: 4188 Comm: trinity-c3 Tainted: G S 6.14.0-rc1-00084-g9a292bc4cbb2 #1
[ 163.122321][ T4188] Tainted: [S]=CPU_OUT_OF_SPEC
[ 163.122717][ T4188] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[ 163.123520][ T4188] EIP: lookup_one_qstr_excl (include/linux/dcache.h:416 include/linux/dcache.h:421 include/linux/dcache.h:467 fs/namei.c:1696)
[ 163.123973][ T4188] Code: 5e 89 d8 89 fa e8 62 ed 00 00 85 c0 74 58 8b 7e 18 89 c2 89 f0 89 d6 8b 4d f0 ff 17 85 c0 75 4d 89 f0 8b 75 f0 b9 00 00 38 00 <23> 08 89 f2 81 e2 00 00 02 00 09 ca 74 13 f7 c6 00 00 04 00 74 17
All code
========
0: 5e pop %rsi
1: 89 d8 mov %ebx,%eax
3: 89 fa mov %edi,%edx
5: e8 62 ed 00 00 call 0xed6c
a: 85 c0 test %eax,%eax
c: 74 58 je 0x66
e: 8b 7e 18 mov 0x18(%rsi),%edi
11: 89 c2 mov %eax,%edx
13: 89 f0 mov %esi,%eax
15: 89 d6 mov %edx,%esi
17: 8b 4d f0 mov -0x10(%rbp),%ecx
1a: ff 17 call *(%rdi)
1c: 85 c0 test %eax,%eax
1e: 75 4d jne 0x6d
20: 89 f0 mov %esi,%eax
22: 8b 75 f0 mov -0x10(%rbp),%esi
25: b9 00 00 38 00 mov $0x380000,%ecx
2a:* 23 08 and (%rax),%ecx <-- trapping instruction
2c: 89 f2 mov %esi,%edx
2e: 81 e2 00 00 02 00 and $0x20000,%edx
34: 09 ca or %ecx,%edx
36: 74 13 je 0x4b
38: f7 c6 00 00 04 00 test $0x40000,%esi
3e: 74 17 je 0x57
Code starting with the faulting instruction
===========================================
0: 23 08 and (%rax),%ecx
2: 89 f2 mov %esi,%edx
4: 81 e2 00 00 02 00 and $0x20000,%edx
a: 09 ca or %ecx,%edx
c: 74 13 je 0x21
e: f7 c6 00 00 04 00 test $0x40000,%esi
14: 74 17 je 0x2d
[ 163.125537][ T4188] EAX: fffffffe EBX: c3e7e540 ECX: 00380000 EDX: 273874b2
[ 163.126145][ T4188] ESI: 00060000 EDI: fffffffe EBP: ebef9ef4 ESP: ebef9edc
[ 163.126716][ T4188] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 EFLAGS: 00010246
[ 163.127324][ T4188] CR0: 80050033 CR2: fffffffe CR3: 2b2df000 CR4: 00040690
[ 163.127872][ T4188] Call Trace:
[ 163.128178][ T4188] ? __die_body (arch/x86/kernel/dumpstack.c:478 arch/x86/kernel/dumpstack.c:420)
[ 163.128552][ T4188] ? __die (arch/x86/kernel/dumpstack.c:434)
[ 163.128898][ T4188] ? page_fault_oops (arch/x86/mm/fault.c:710)
[ 163.129329][ T4188] ? lock_acquire (kernel/locking/lockdep.c:5851)
[ 163.129723][ T4188] ? kernelmode_fixup_or_oops (arch/x86/mm/fault.c:737)
[ 163.130222][ T4188] ? __bad_area_nosemaphore (arch/x86/mm/fault.c:784)
[ 163.130687][ T4188] ? bad_area_nosemaphore (arch/x86/mm/fault.c:833)
[ 163.131129][ T4188] ? do_kern_addr_fault (arch/x86/mm/fault.c:1197)
[ 163.131569][ T4188] ? exc_page_fault (arch/x86/mm/fault.c:1479)
[ 163.132004][ T4188] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:? kernel/locking/lockdep.c:4408)
[ 163.132482][ T4188] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1493)
[ 163.132999][ T4188] ? handle_exception (arch/x86/entry/entry_32.S:1048)
[ 163.133429][ T4188] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1493)
[ 163.133946][ T4188] ? lookup_one_qstr_excl (include/linux/dcache.h:416 include/linux/dcache.h:421 include/linux/dcache.h:467 fs/namei.c:1696)
[ 163.134390][ T4188] ? pvclock_clocksource_read_nowd (arch/x86/mm/fault.c:1493)
[ 163.134919][ T4188] ? lookup_one_qstr_excl (include/linux/dcache.h:416 include/linux/dcache.h:421 include/linux/dcache.h:467 fs/namei.c:1696)
[ 163.135354][ T4188] filename_create (include/linux/err.h:67 fs/namei.c:4091)
[ 163.135763][ T4188] do_symlinkat (fs/namei.c:4676)
[ 163.136166][ T4188] __ia32_sys_symlinkat (fs/namei.c:4696)
[ 163.136593][ T4188] ia32_sys_call (arch/x86/entry/syscall_32.c:44)
[ 163.136967][ T4188] __do_fast_syscall_32 (arch/x86/entry/common.c:?)
[ 163.137377][ T4188] do_fast_syscall_32 (arch/x86/entry/common.c:411)
[ 163.137774][ T4188] do_SYSENTER_32 (arch/x86/entry/common.c:449)
[ 163.138146][ T4188] entry_SYSENTER_32 (arch/x86/entry/entry_32.S:836)
[ 163.138542][ T4188] EIP: 0xb7f61539
[ 163.138845][ T4188] Code: 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 0f 1f 00 58 b8 77 00 00 00 cd 80 90 0f 1f
All code
========
0: 03 74 b4 01 add 0x1(%rsp,%rsi,4),%esi
4: 10 07 adc %al,(%rdi)
6: 03 74 b0 01 add 0x1(%rax,%rsi,4),%esi
a: 10 08 adc %cl,(%rax)
c: 03 74 d8 01 add 0x1(%rax,%rbx,8),%esi
...
20: 00 51 52 add %dl,0x52(%rcx)
23: 55 push %rbp
24:* 89 e5 mov %esp,%ebp <-- trapping instruction
26: 0f 34 sysenter
28: cd 80 int $0x80
2a: 5d pop %rbp
2b: 5a pop %rdx
2c: 59 pop %rcx
2d: c3 ret
2e: 90 nop
2f: 90 nop
30: 90 nop
31: 90 nop
32: 0f 1f 00 nopl (%rax)
35: 58 pop %rax
36: b8 77 00 00 00 mov $0x77,%eax
3b: cd 80 int $0x80
3d: 90 nop
3e: 0f .byte 0xf
3f: 1f (bad)
Code starting with the faulting instruction
===========================================
0: 5d pop %rbp
1: 5a pop %rdx
2: 59 pop %rcx
3: c3 ret
4: 90 nop
5: 90 nop
6: 90 nop
7: 90 nop
8: 0f 1f 00 nopl (%rax)
b: 58 pop %rax
c: b8 77 00 00 00 mov $0x77,%eax
11: cd 80 int $0x80
13: 90 nop
14: 0f .byte 0xf
15: 1f (bad)
The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20250212/202502121009.f7bc8e67-lkp@intel.com
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
2025-02-07 3:36 ` [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl() NeilBrown
2025-02-12 2:50 ` kernel test robot
@ 2025-02-12 3:16 ` Al Viro
2025-02-12 3:25 ` Al Viro
1 sibling, 1 reply; 15+ messages in thread
From: Al Viro @ 2025-02-12 3:16 UTC (permalink / raw)
To: NeilBrown
Cc: Christian Brauner, Jan Kara, Greg Kroah-Hartman,
Rafael J. Wysocki, Danilo Krummrich, Kent Overstreet,
Trond Myklebust, Anna Schumaker, Namjae Jeon, Steve French,
Sergey Senozhatsky, Tom Talpey, Paul Moore, Eric Paris,
linux-kernel, linux-bcachefs, linux-fsdevel, linux-nfs,
linux-cifs, audit
On Fri, Feb 07, 2025 at 02:36:48PM +1100, NeilBrown wrote:
> @@ -1690,6 +1692,15 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
> dput(dentry);
> dentry = old;
> }
> +found:
... and if ->lookup() returns an error, this will blow up (as bot has just
reported).
> + if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) {
> + dput(dentry);
> + return ERR_PTR(-ENOENT);
> + }
> + if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) {
> + dput(dentry);
> + return ERR_PTR(-EEXIST);
> + }
> @@ -4077,27 +4084,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
> * '/', and a directory wasn't requested.
> */
> if (last.name[last.len] && !want_dir)
> - create_flags = 0;
> + create_flags &= ~LOOKUP_CREATE;
See the patch I've posted in earlier thread; the entire "strip LOOKUP_CREATE"
thing is wrong.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
2025-02-12 3:16 ` Al Viro
@ 2025-02-12 3:25 ` Al Viro
2025-02-12 3:45 ` NeilBrown
0 siblings, 1 reply; 15+ messages in thread
From: Al Viro @ 2025-02-12 3:25 UTC (permalink / raw)
To: NeilBrown
Cc: Christian Brauner, Jan Kara, Greg Kroah-Hartman,
Rafael J. Wysocki, Danilo Krummrich, Kent Overstreet,
Trond Myklebust, Anna Schumaker, Namjae Jeon, Steve French,
Sergey Senozhatsky, Tom Talpey, Paul Moore, Eric Paris,
linux-kernel, linux-bcachefs, linux-fsdevel, linux-nfs,
linux-cifs, audit
On Wed, Feb 12, 2025 at 03:16:08AM +0000, Al Viro wrote:
> On Fri, Feb 07, 2025 at 02:36:48PM +1100, NeilBrown wrote:
> > @@ -1690,6 +1692,15 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
> > dput(dentry);
> > dentry = old;
> > }
> > +found:
>
> ... and if ->lookup() returns an error, this will blow up (as bot has just
> reported).
>
> > + if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) {
> > + dput(dentry);
> > + return ERR_PTR(-ENOENT);
> > + }
> > + if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) {
> > + dput(dentry);
> > + return ERR_PTR(-EEXIST);
> > + }
>
>
> > @@ -4077,27 +4084,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
> > * '/', and a directory wasn't requested.
> > */
> > if (last.name[last.len] && !want_dir)
> > - create_flags = 0;
> > + create_flags &= ~LOOKUP_CREATE;
>
> See the patch I've posted in earlier thread; the entire "strip LOOKUP_CREATE"
> thing is wrong.
On top of mainline that's
filename_create(): don't force handling trailing slashes into the common path
Only mkdir accepts pathnames that end with / - anything like mknod() (symlink(),
etc.) always fails on those. Don't try to force that the common codepath -
all we are doing is a lookup and check for existence to determine which
error should it be. Do that before bothering with mnt_want_write(), etc.;
as far as underlying filesystem is concerned it's just a lookup. Simplifies
the normal codepath and kills the lookup intent dependency on more than
the call site.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
diff --git a/fs/namei.c b/fs/namei.c
index 3ab9440c5b93..6189e54f767a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4054,13 +4054,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
struct dentry *dentry = ERR_PTR(-EEXIST);
struct qstr last;
bool want_dir = lookup_flags & LOOKUP_DIRECTORY;
- unsigned int reval_flag = lookup_flags & LOOKUP_REVAL;
- unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL;
int type;
int err2;
int error;
- error = filename_parentat(dfd, name, reval_flag, path, &last, &type);
+ lookup_flags &= LOOKUP_REVAL;
+
+ error = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
if (error)
return ERR_PTR(error);
@@ -4070,18 +4070,28 @@ static struct dentry *filename_create(int dfd, struct filename *name,
*/
if (unlikely(type != LAST_NORM))
goto out;
+ /*
+ * mkdir foo/bar/ is OK, but for anything else a slash in the end
+ * is always an error; the only question is which one.
+ */
+ if (unlikely(last.name[last.len] && !want_dir)) {
+ dentry = lookup_dcache(&last, path->dentry, lookup_flags);
+ if (!dentry)
+ dentry = lookup_slow(&last, path->dentry, lookup_flags);
+ if (!IS_ERR(dentry)) {
+ error = d_is_positive(dentry) ? -EEXIST : -ENOENT;
+ dput(dentry);
+ dentry = ERR_PTR(error);
+ }
+ goto out;
+ }
/* don't fail immediately if it's r/o, at least try to report other errors */
err2 = mnt_want_write(path->mnt);
- /*
- * Do the final lookup. Suppress 'create' if there is a trailing
- * '/', and a directory wasn't requested.
- */
- if (last.name[last.len] && !want_dir)
- create_flags = 0;
+ /* do the final lookup */
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
dentry = lookup_one_qstr_excl(&last, path->dentry,
- reval_flag | create_flags);
+ lookup_flags | LOOKUP_CREATE | LOOKUP_EXCL);
if (IS_ERR(dentry))
goto unlock;
@@ -4089,16 +4099,6 @@ static struct dentry *filename_create(int dfd, struct filename *name,
if (d_is_positive(dentry))
goto fail;
- /*
- * Special case - lookup gave negative, but... we had foo/bar/
- * From the vfs_mknod() POV we just have a negative dentry -
- * all is fine. Let's be bastards - you had / on the end, you've
- * been asking for (non-existent) directory. -ENOENT for you.
- */
- if (unlikely(!create_flags)) {
- error = -ENOENT;
- goto fail;
- }
if (unlikely(err2)) {
error = err2;
goto fail;
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
2025-02-12 3:25 ` Al Viro
@ 2025-02-12 3:45 ` NeilBrown
2025-02-12 4:06 ` Al Viro
0 siblings, 1 reply; 15+ messages in thread
From: NeilBrown @ 2025-02-12 3:45 UTC (permalink / raw)
To: Al Viro
Cc: Christian Brauner, Jan Kara, Greg Kroah-Hartman,
Rafael J. Wysocki, Danilo Krummrich, Kent Overstreet,
Trond Myklebust, Anna Schumaker, Namjae Jeon, Steve French,
Sergey Senozhatsky, Tom Talpey, Paul Moore, Eric Paris,
linux-kernel, linux-bcachefs, linux-fsdevel, linux-nfs,
linux-cifs, audit
On Wed, 12 Feb 2025, Al Viro wrote:
> On Wed, Feb 12, 2025 at 03:16:08AM +0000, Al Viro wrote:
> > On Fri, Feb 07, 2025 at 02:36:48PM +1100, NeilBrown wrote:
> > > @@ -1690,6 +1692,15 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
> > > dput(dentry);
> > > dentry = old;
> > > }
> > > +found:
> >
> > ... and if ->lookup() returns an error, this will blow up (as bot has just
> > reported).
Yes, I need an early exit if (IS_ERR(dentry)). Thanks.
> >
> > > + if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) {
> > > + dput(dentry);
> > > + return ERR_PTR(-ENOENT);
> > > + }
> > > + if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) {
> > > + dput(dentry);
> > > + return ERR_PTR(-EEXIST);
> > > + }
> >
> >
> > > @@ -4077,27 +4084,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
> > > * '/', and a directory wasn't requested.
> > > */
> > > if (last.name[last.len] && !want_dir)
> > > - create_flags = 0;
> > > + create_flags &= ~LOOKUP_CREATE;
> >
> > See the patch I've posted in earlier thread; the entire "strip LOOKUP_CREATE"
> > thing is wrong.
>
> On top of mainline that's
>
> filename_create(): don't force handling trailing slashes into the common path
>
> Only mkdir accepts pathnames that end with / - anything like mknod() (symlink(),
> etc.) always fails on those. Don't try to force that the common codepath -
> all we are doing is a lookup and check for existence to determine which
> error should it be. Do that before bothering with mnt_want_write(), etc.;
> as far as underlying filesystem is concerned it's just a lookup. Simplifies
> the normal codepath and kills the lookup intent dependency on more than
> the call site.
>
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---
> diff --git a/fs/namei.c b/fs/namei.c
> index 3ab9440c5b93..6189e54f767a 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -4054,13 +4054,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
> struct dentry *dentry = ERR_PTR(-EEXIST);
> struct qstr last;
> bool want_dir = lookup_flags & LOOKUP_DIRECTORY;
> - unsigned int reval_flag = lookup_flags & LOOKUP_REVAL;
> - unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL;
> int type;
> int err2;
> int error;
>
> - error = filename_parentat(dfd, name, reval_flag, path, &last, &type);
> + lookup_flags &= LOOKUP_REVAL;
> +
> + error = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
> if (error)
> return ERR_PTR(error);
>
> @@ -4070,18 +4070,28 @@ static struct dentry *filename_create(int dfd, struct filename *name,
> */
> if (unlikely(type != LAST_NORM))
> goto out;
> + /*
> + * mkdir foo/bar/ is OK, but for anything else a slash in the end
> + * is always an error; the only question is which one.
> + */
> + if (unlikely(last.name[last.len] && !want_dir)) {
> + dentry = lookup_dcache(&last, path->dentry, lookup_flags);
> + if (!dentry)
> + dentry = lookup_slow(&last, path->dentry, lookup_flags);
I do see some value in the simplicity of this approach, though maybe not
as much value as you see. But the above uses inode_lock_share(), rather
than the nested version, so lockdep will complain.
If you open-code a nested lock, or write a new helper, you get very
close to the sequence for calling lookup_one_qstr_excl() below. So
it isn't clear to me that the benefit is worth the cost.
This current code in filename_create isn't actually wrong is it?
Thanks,
NeilBrown
> + if (!IS_ERR(dentry)) {
> + error = d_is_positive(dentry) ? -EEXIST : -ENOENT;
> + dput(dentry);
> + dentry = ERR_PTR(error);
> + }
> + goto out;
> + }
>
> /* don't fail immediately if it's r/o, at least try to report other errors */
> err2 = mnt_want_write(path->mnt);
> - /*
> - * Do the final lookup. Suppress 'create' if there is a trailing
> - * '/', and a directory wasn't requested.
> - */
> - if (last.name[last.len] && !want_dir)
> - create_flags = 0;
> + /* do the final lookup */
> inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
> dentry = lookup_one_qstr_excl(&last, path->dentry,
> - reval_flag | create_flags);
> + lookup_flags | LOOKUP_CREATE | LOOKUP_EXCL);
> if (IS_ERR(dentry))
> goto unlock;
>
> @@ -4089,16 +4099,6 @@ static struct dentry *filename_create(int dfd, struct filename *name,
> if (d_is_positive(dentry))
> goto fail;
>
> - /*
> - * Special case - lookup gave negative, but... we had foo/bar/
> - * From the vfs_mknod() POV we just have a negative dentry -
> - * all is fine. Let's be bastards - you had / on the end, you've
> - * been asking for (non-existent) directory. -ENOENT for you.
> - */
> - if (unlikely(!create_flags)) {
> - error = -ENOENT;
> - goto fail;
> - }
> if (unlikely(err2)) {
> error = err2;
> goto fail;
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
2025-02-12 3:45 ` NeilBrown
@ 2025-02-12 4:06 ` Al Viro
2025-02-12 4:40 ` NeilBrown
0 siblings, 1 reply; 15+ messages in thread
From: Al Viro @ 2025-02-12 4:06 UTC (permalink / raw)
To: NeilBrown
Cc: Christian Brauner, Jan Kara, Greg Kroah-Hartman,
Rafael J. Wysocki, Danilo Krummrich, Kent Overstreet,
Trond Myklebust, Anna Schumaker, Namjae Jeon, Steve French,
Sergey Senozhatsky, Tom Talpey, Paul Moore, Eric Paris,
linux-kernel, linux-bcachefs, linux-fsdevel, linux-nfs,
linux-cifs, audit
On Wed, Feb 12, 2025 at 02:45:04PM +1100, NeilBrown wrote:
> On Wed, 12 Feb 2025, Al Viro wrote:
> I do see some value in the simplicity of this approach, though maybe not
> as much value as you see. But the above uses inode_lock_share(), rather
> than the nested version, so lockdep will complain.
IDGI... It doesn't grab any ->i_rwsem inside that one, so what's there to
complain about? And in that case it returns with no locks held, so...
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
2025-02-12 4:06 ` Al Viro
@ 2025-02-12 4:40 ` NeilBrown
0 siblings, 0 replies; 15+ messages in thread
From: NeilBrown @ 2025-02-12 4:40 UTC (permalink / raw)
To: Al Viro
Cc: Christian Brauner, Jan Kara, Greg Kroah-Hartman,
Rafael J. Wysocki, Danilo Krummrich, Kent Overstreet,
Trond Myklebust, Anna Schumaker, Namjae Jeon, Steve French,
Sergey Senozhatsky, Tom Talpey, Paul Moore, Eric Paris,
linux-kernel, linux-bcachefs, linux-fsdevel, linux-nfs,
linux-cifs, audit
On Wed, 12 Feb 2025, Al Viro wrote:
> On Wed, Feb 12, 2025 at 02:45:04PM +1100, NeilBrown wrote:
> > On Wed, 12 Feb 2025, Al Viro wrote:
>
> > I do see some value in the simplicity of this approach, though maybe not
> > as much value as you see. But the above uses inode_lock_share(), rather
> > than the nested version, so lockdep will complain.
>
> IDGI... It doesn't grab any ->i_rwsem inside that one, so what's there to
> complain about? And in that case it returns with no locks held, so...
>
Sorry - my bad. I saw the difference in nesting and jumped the wrong
way.
NeilBrown
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 0/2 v2] VFS: minor improvements to a couple of interfaces
@ 2025-02-17 0:27 NeilBrown
2025-02-17 0:27 ` [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry NeilBrown
2025-02-17 0:27 ` [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl() NeilBrown
0 siblings, 2 replies; 15+ messages in thread
From: NeilBrown @ 2025-02-17 0:27 UTC (permalink / raw)
To: Christian Brauner, Alexander Viro, Jan Kara; +Cc: linux-fsdevel, linux-nfs
Hi,
please replace the top two patches in vfs/vfs-6.15.async.dir
with these two revised versions.
Both add text to filesytems/porting.rst
The first moves an 'err = 0' as was requested if any refresh is needed.
The second adds a check for errors from ->lookup. This omission is
triggering various error reports.
Thanks,
NeilBrown
[PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at()
[PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry
2025-02-17 0:27 [PATCH 0/2 v2] VFS: minor improvements to a couple of interfaces NeilBrown
@ 2025-02-17 0:27 ` NeilBrown
2025-02-17 8:26 ` Christian Brauner
` (2 more replies)
2025-02-17 0:27 ` [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl() NeilBrown
1 sibling, 3 replies; 15+ messages in thread
From: NeilBrown @ 2025-02-17 0:27 UTC (permalink / raw)
To: Christian Brauner, Alexander Viro, Jan Kara; +Cc: linux-fsdevel, linux-nfs
No callers of kern_path_locked() or user_path_locked_at() want a
negative dentry. So change them to return -ENOENT instead. This
simplifies callers.
This results in a subtle change to bcachefs in that an ioctl will now
return -ENOENT in preference to -EXDEV. I believe this restores the
behaviour to what it was prior to
Commit bbe6a7c899e7 ("bch2_ioctl_subvolume_destroy(): fix locking")
Signed-off-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20250207034040.3402438-2-neilb@suse.de
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
Documentation/filesystems/porting.rst | 8 ++++
drivers/base/devtmpfs.c | 65 +++++++++++++--------------
fs/bcachefs/fs-ioctl.c | 4 --
fs/namei.c | 4 ++
kernel/audit_watch.c | 12 ++---
5 files changed, 48 insertions(+), 45 deletions(-)
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 1639e78e3146..2ead47e20677 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1157,3 +1157,11 @@ in normal case it points into the pathname being looked up.
NOTE: if you need something like full path from the root of filesystem,
you are still on your own - this assists with simple cases, but it's not
magic.
+
+---
+
+** recommended**
+
+kern_path_locked() and user_path_locked() no longer return a negative
+dentry so this doesn't need to be checked. If the name cannot be found,
+ERR_PTR(-ENOENT) is returned.
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index b848764ef018..c9e34842139f 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -245,15 +245,12 @@ static int dev_rmdir(const char *name)
dentry = kern_path_locked(name, &parent);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- if (d_really_is_positive(dentry)) {
- if (d_inode(dentry)->i_private == &thread)
- err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
- dentry);
- else
- err = -EPERM;
- } else {
- err = -ENOENT;
- }
+ if (d_inode(dentry)->i_private == &thread)
+ err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
+ dentry);
+ else
+ err = -EPERM;
+
dput(dentry);
inode_unlock(d_inode(parent.dentry));
path_put(&parent);
@@ -310,6 +307,8 @@ static int handle_remove(const char *nodename, struct device *dev)
{
struct path parent;
struct dentry *dentry;
+ struct kstat stat;
+ struct path p;
int deleted = 0;
int err;
@@ -317,32 +316,28 @@ static int handle_remove(const char *nodename, struct device *dev)
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- if (d_really_is_positive(dentry)) {
- struct kstat stat;
- struct path p = {.mnt = parent.mnt, .dentry = dentry};
- err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
- AT_STATX_SYNC_AS_STAT);
- if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
- struct iattr newattrs;
- /*
- * before unlinking this node, reset permissions
- * of possible references like hardlinks
- */
- newattrs.ia_uid = GLOBAL_ROOT_UID;
- newattrs.ia_gid = GLOBAL_ROOT_GID;
- newattrs.ia_mode = stat.mode & ~0777;
- newattrs.ia_valid =
- ATTR_UID|ATTR_GID|ATTR_MODE;
- inode_lock(d_inode(dentry));
- notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
- inode_unlock(d_inode(dentry));
- err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
- dentry, NULL);
- if (!err || err == -ENOENT)
- deleted = 1;
- }
- } else {
- err = -ENOENT;
+ p.mnt = parent.mnt;
+ p.dentry = dentry;
+ err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
+ AT_STATX_SYNC_AS_STAT);
+ if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
+ struct iattr newattrs;
+ /*
+ * before unlinking this node, reset permissions
+ * of possible references like hardlinks
+ */
+ newattrs.ia_uid = GLOBAL_ROOT_UID;
+ newattrs.ia_gid = GLOBAL_ROOT_GID;
+ newattrs.ia_mode = stat.mode & ~0777;
+ newattrs.ia_valid =
+ ATTR_UID|ATTR_GID|ATTR_MODE;
+ inode_lock(d_inode(dentry));
+ notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
+ inode_unlock(d_inode(dentry));
+ err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
+ dentry, NULL);
+ if (!err || err == -ENOENT)
+ deleted = 1;
}
dput(dentry);
inode_unlock(d_inode(parent.dentry));
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 15725b4ce393..595b57fabc9a 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -511,10 +511,6 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
ret = -EXDEV;
goto err;
}
- if (!d_is_positive(victim)) {
- ret = -ENOENT;
- goto err;
- }
ret = __bch2_unlink(dir, victim, true);
if (!ret) {
fsnotify_rmdir(dir, victim);
diff --git a/fs/namei.c b/fs/namei.c
index 3ab9440c5b93..fb6da3ca0ca5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2741,6 +2741,10 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct
}
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
d = lookup_one_qstr_excl(&last, path->dentry, 0);
+ if (!IS_ERR(d) && d_is_negative(d)) {
+ dput(d);
+ d = ERR_PTR(-ENOENT);
+ }
if (IS_ERR(d)) {
inode_unlock(path->dentry->d_inode);
path_put(path);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 7f358740e958..367eaf2c78b7 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -350,11 +350,10 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
- if (d_is_positive(d)) {
- /* update watch filter fields */
- watch->dev = d->d_sb->s_dev;
- watch->ino = d_backing_inode(d)->i_ino;
- }
+ /* update watch filter fields */
+ watch->dev = d->d_sb->s_dev;
+ watch->ino = d_backing_inode(d)->i_ino;
+
inode_unlock(d_backing_inode(parent->dentry));
dput(d);
return 0;
@@ -419,10 +418,11 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
/* caller expects mutex locked */
mutex_lock(&audit_filter_mutex);
- if (ret) {
+ if (ret && ret != -ENOENT) {
audit_put_watch(watch);
return ret;
}
+ ret = 0;
/* either find an old parent or attach a new one */
parent = audit_find_parent(d_backing_inode(parent_path.dentry));
--
2.47.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
2025-02-17 0:27 [PATCH 0/2 v2] VFS: minor improvements to a couple of interfaces NeilBrown
2025-02-17 0:27 ` [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry NeilBrown
@ 2025-02-17 0:27 ` NeilBrown
2025-02-17 13:46 ` Jeff Layton
1 sibling, 1 reply; 15+ messages in thread
From: NeilBrown @ 2025-02-17 0:27 UTC (permalink / raw)
To: Christian Brauner, Alexander Viro, Jan Kara; +Cc: linux-fsdevel, linux-nfs
Callers of lookup_one_qstr_excl() often check if the result is negative or
positive.
These changes can easily be moved into lookup_one_qstr_excl() by checking the
lookup flags:
LOOKUP_CREATE means it is NOT an error if the name doesn't exist.
LOOKUP_EXCL means it IS an error if the name DOES exist.
This patch adds these checks, then removes error checks from callers,
and ensures that appropriate flags are passed.
This subtly changes the meaning of LOOKUP_EXCL. Previously it could
only accompany LOOKUP_CREATE. Now it can accompany LOOKUP_RENAME_TARGET
as well. A couple of small changes are needed to accommodate this. The
NFS change is functionally a no-op but ensures nfs_is_exclusive_create() does
exactly what the name says.
Signed-off-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20250207034040.3402438-3-neilb@suse.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
Documentation/filesystems/porting.rst | 12 ++++++
fs/namei.c | 61 +++++++++------------------
fs/nfs/dir.c | 3 +-
fs/smb/server/vfs.c | 26 +++++-------
4 files changed, 45 insertions(+), 57 deletions(-)
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 2ead47e20677..3b6622fbd66b 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1165,3 +1165,15 @@ magic.
kern_path_locked() and user_path_locked() no longer return a negative
dentry so this doesn't need to be checked. If the name cannot be found,
ERR_PTR(-ENOENT) is returned.
+
+** recommend**
+
+lookup_one_qstr_excl() is changed to return errors in more cases, so
+these conditions don't require explicit checks.
+ - if LOOKUP_CREATE is NOT given, then the dentry won't be negative,
+ ERR_PTR(-ENOENT) is returned instead
+ - if LOOKUP_EXCL IS given, then the dentry won't be positive,
+ ERR_PTR(-EEXIST) is rreturned instread
+
+LOOKUP_EXCL now means "target must not exist". It can be combined with
+LOOK_CREATE or LOOKUP_RENAME_TARGET.
diff --git a/fs/namei.c b/fs/namei.c
index fb6da3ca0ca5..b7cdca902803 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1670,6 +1670,8 @@ static struct dentry *lookup_dcache(const struct qstr *name,
* dentries - as the matter of fact, this only gets called
* when directory is guaranteed to have no in-lookup children
* at all.
+ * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed.
+ * Will return -EEXIST if name is found and LOOKUP_EXCL was passed.
*/
struct dentry *lookup_one_qstr_excl(const struct qstr *name,
struct dentry *base,
@@ -1680,7 +1682,7 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
struct inode *dir = base->d_inode;
if (dentry)
- return dentry;
+ goto found;
/* Don't create child dentry for a dead directory. */
if (unlikely(IS_DEADDIR(dir)))
@@ -1695,6 +1697,17 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
dput(dentry);
dentry = old;
}
+found:
+ if (IS_ERR(dentry))
+ return dentry;
+ if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) {
+ dput(dentry);
+ return ERR_PTR(-ENOENT);
+ }
+ if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) {
+ dput(dentry);
+ return ERR_PTR(-EEXIST);
+ }
return dentry;
}
EXPORT_SYMBOL(lookup_one_qstr_excl);
@@ -2741,10 +2754,6 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct
}
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
d = lookup_one_qstr_excl(&last, path->dentry, 0);
- if (!IS_ERR(d) && d_is_negative(d)) {
- dput(d);
- d = ERR_PTR(-ENOENT);
- }
if (IS_ERR(d)) {
inode_unlock(path->dentry->d_inode);
path_put(path);
@@ -4082,27 +4091,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
* '/', and a directory wasn't requested.
*/
if (last.name[last.len] && !want_dir)
- create_flags = 0;
+ create_flags &= ~LOOKUP_CREATE;
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
dentry = lookup_one_qstr_excl(&last, path->dentry,
reval_flag | create_flags);
if (IS_ERR(dentry))
goto unlock;
- error = -EEXIST;
- if (d_is_positive(dentry))
- goto fail;
-
- /*
- * Special case - lookup gave negative, but... we had foo/bar/
- * From the vfs_mknod() POV we just have a negative dentry -
- * all is fine. Let's be bastards - you had / on the end, you've
- * been asking for (non-existent) directory. -ENOENT for you.
- */
- if (unlikely(!create_flags)) {
- error = -ENOENT;
- goto fail;
- }
if (unlikely(err2)) {
error = err2;
goto fail;
@@ -4449,10 +4444,6 @@ int do_rmdir(int dfd, struct filename *name)
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit3;
- if (!dentry->d_inode) {
- error = -ENOENT;
- goto exit4;
- }
error = security_path_rmdir(&path, dentry);
if (error)
goto exit4;
@@ -4583,7 +4574,7 @@ int do_unlinkat(int dfd, struct filename *name)
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
- if (last.name[last.len] || d_is_negative(dentry))
+ if (last.name[last.len])
goto slashes;
inode = dentry->d_inode;
ihold(inode);
@@ -4617,9 +4608,7 @@ int do_unlinkat(int dfd, struct filename *name)
return error;
slashes:
- if (d_is_negative(dentry))
- error = -ENOENT;
- else if (d_is_dir(dentry))
+ if (d_is_dir(dentry))
error = -EISDIR;
else
error = -ENOTDIR;
@@ -5119,7 +5108,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
struct qstr old_last, new_last;
int old_type, new_type;
struct inode *delegated_inode = NULL;
- unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
+ unsigned int lookup_flags = 0, target_flags =
+ LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
bool should_retry = false;
int error = -EINVAL;
@@ -5132,6 +5122,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
if (flags & RENAME_EXCHANGE)
target_flags = 0;
+ if (flags & RENAME_NOREPLACE)
+ target_flags |= LOOKUP_EXCL;
retry:
error = filename_parentat(olddfd, from, lookup_flags, &old_path,
@@ -5173,23 +5165,12 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
- /* source must exist */
- error = -ENOENT;
- if (d_is_negative(old_dentry))
- goto exit4;
new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
lookup_flags | target_flags);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
- error = -EEXIST;
- if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
- goto exit5;
if (flags & RENAME_EXCHANGE) {
- error = -ENOENT;
- if (d_is_negative(new_dentry))
- goto exit5;
-
if (!d_is_dir(new_dentry)) {
error = -ENOTDIR;
if (new_last.name[new_last.len])
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2b04038b0e40..56cf16a72334 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1532,7 +1532,8 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
{
if (NFS_PROTO(dir)->version == 2)
return 0;
- return flags & LOOKUP_EXCL;
+ return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) ==
+ (LOOKUP_CREATE | LOOKUP_EXCL);
}
/*
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 6890016e1923..fe29acef5872 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -113,11 +113,6 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
if (IS_ERR(d))
goto err_out;
- if (d_is_negative(d)) {
- dput(d);
- goto err_out;
- }
-
path->dentry = d;
path->mnt = mntget(parent_path->mnt);
@@ -693,6 +688,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
struct ksmbd_file *parent_fp;
int new_type;
int err, lookup_flags = LOOKUP_NO_SYMLINKS;
+ int target_lookup_flags = LOOKUP_RENAME_TARGET;
if (ksmbd_override_fsids(work))
return -ENOMEM;
@@ -703,6 +699,14 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto revert_fsids;
}
+ /*
+ * explicitly handle file overwrite case, for compatibility with
+ * filesystems that may not support rename flags (e.g: fuse)
+ */
+ if (flags & RENAME_NOREPLACE)
+ target_lookup_flags |= LOOKUP_EXCL;
+ flags &= ~(RENAME_NOREPLACE);
+
retry:
err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
&new_path, &new_last, &new_type,
@@ -743,7 +747,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
}
new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
- lookup_flags | LOOKUP_RENAME_TARGET);
+ lookup_flags | target_lookup_flags);
if (IS_ERR(new_dentry)) {
err = PTR_ERR(new_dentry);
goto out3;
@@ -754,16 +758,6 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto out4;
}
- /*
- * explicitly handle file overwrite case, for compatibility with
- * filesystems that may not support rename flags (e.g: fuse)
- */
- if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
- err = -EEXIST;
- goto out4;
- }
- flags &= ~(RENAME_NOREPLACE);
-
if (old_child == trap) {
err = -EINVAL;
goto out4;
--
2.47.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry
2025-02-17 0:27 ` [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry NeilBrown
@ 2025-02-17 8:26 ` Christian Brauner
2025-02-17 13:41 ` Jeff Layton
2025-04-14 11:01 ` Christian Brauner
2 siblings, 0 replies; 15+ messages in thread
From: Christian Brauner @ 2025-02-17 8:26 UTC (permalink / raw)
To: NeilBrown
Cc: Christian Brauner, linux-fsdevel, linux-nfs, Alexander Viro,
Jan Kara
On Mon, 17 Feb 2025 11:27:20 +1100, NeilBrown wrote:
> No callers of kern_path_locked() or user_path_locked_at() want a
> negative dentry. So change them to return -ENOENT instead. This
> simplifies callers.
>
> This results in a subtle change to bcachefs in that an ioctl will now
> return -ENOENT in preference to -EXDEV. I believe this restores the
> behaviour to what it was prior to
> Commit bbe6a7c899e7 ("bch2_ioctl_subvolume_destroy(): fix locking")
>
> [...]
Applied to the vfs-6.15.async.dir branch of the vfs/vfs.git tree.
Patches in the vfs-6.15.async.dir branch should appear in linux-next soon.
Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.
It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.
Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.
tree: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs-6.15.async.dir
[1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry
https://git.kernel.org/vfs/vfs/c/a97b8bfbb9f1
[2/2] VFS: add common error checks to lookup_one_qstr_excl()
https://git.kernel.org/vfs/vfs/c/20c2c1baa9ab
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry
2025-02-17 0:27 ` [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry NeilBrown
2025-02-17 8:26 ` Christian Brauner
@ 2025-02-17 13:41 ` Jeff Layton
2025-04-14 11:01 ` Christian Brauner
2 siblings, 0 replies; 15+ messages in thread
From: Jeff Layton @ 2025-02-17 13:41 UTC (permalink / raw)
To: NeilBrown, Christian Brauner, Alexander Viro, Jan Kara
Cc: linux-fsdevel, linux-nfs
On Mon, 2025-02-17 at 11:27 +1100, NeilBrown wrote:
> No callers of kern_path_locked() or user_path_locked_at() want a
> negative dentry. So change them to return -ENOENT instead. This
> simplifies callers.
>
> This results in a subtle change to bcachefs in that an ioctl will now
> return -ENOENT in preference to -EXDEV. I believe this restores the
> behaviour to what it was prior to
> Commit bbe6a7c899e7 ("bch2_ioctl_subvolume_destroy(): fix locking")
>
> Signed-off-by: NeilBrown <neilb@suse.de>
> Link: https://lore.kernel.org/r/20250207034040.3402438-2-neilb@suse.de
> Acked-by: Paul Moore <paul@paul-moore.com>
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
> Documentation/filesystems/porting.rst | 8 ++++
> drivers/base/devtmpfs.c | 65 +++++++++++++--------------
> fs/bcachefs/fs-ioctl.c | 4 --
> fs/namei.c | 4 ++
> kernel/audit_watch.c | 12 ++---
> 5 files changed, 48 insertions(+), 45 deletions(-)
>
> diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
> index 1639e78e3146..2ead47e20677 100644
> --- a/Documentation/filesystems/porting.rst
> +++ b/Documentation/filesystems/porting.rst
> @@ -1157,3 +1157,11 @@ in normal case it points into the pathname being looked up.
> NOTE: if you need something like full path from the root of filesystem,
> you are still on your own - this assists with simple cases, but it's not
> magic.
> +
> +---
> +
> +** recommended**
> +
> +kern_path_locked() and user_path_locked() no longer return a negative
> +dentry so this doesn't need to be checked. If the name cannot be found,
> +ERR_PTR(-ENOENT) is returned.
> diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
> index b848764ef018..c9e34842139f 100644
> --- a/drivers/base/devtmpfs.c
> +++ b/drivers/base/devtmpfs.c
> @@ -245,15 +245,12 @@ static int dev_rmdir(const char *name)
> dentry = kern_path_locked(name, &parent);
> if (IS_ERR(dentry))
> return PTR_ERR(dentry);
> - if (d_really_is_positive(dentry)) {
> - if (d_inode(dentry)->i_private == &thread)
> - err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
> - dentry);
> - else
> - err = -EPERM;
> - } else {
> - err = -ENOENT;
> - }
> + if (d_inode(dentry)->i_private == &thread)
> + err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry),
> + dentry);
> + else
> + err = -EPERM;
> +
> dput(dentry);
> inode_unlock(d_inode(parent.dentry));
> path_put(&parent);
> @@ -310,6 +307,8 @@ static int handle_remove(const char *nodename, struct device *dev)
> {
> struct path parent;
> struct dentry *dentry;
> + struct kstat stat;
> + struct path p;
> int deleted = 0;
> int err;
>
> @@ -317,32 +316,28 @@ static int handle_remove(const char *nodename, struct device *dev)
> if (IS_ERR(dentry))
> return PTR_ERR(dentry);
>
> - if (d_really_is_positive(dentry)) {
> - struct kstat stat;
> - struct path p = {.mnt = parent.mnt, .dentry = dentry};
> - err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
> - AT_STATX_SYNC_AS_STAT);
> - if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
> - struct iattr newattrs;
> - /*
> - * before unlinking this node, reset permissions
> - * of possible references like hardlinks
> - */
> - newattrs.ia_uid = GLOBAL_ROOT_UID;
> - newattrs.ia_gid = GLOBAL_ROOT_GID;
> - newattrs.ia_mode = stat.mode & ~0777;
> - newattrs.ia_valid =
> - ATTR_UID|ATTR_GID|ATTR_MODE;
> - inode_lock(d_inode(dentry));
> - notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
> - inode_unlock(d_inode(dentry));
> - err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
> - dentry, NULL);
> - if (!err || err == -ENOENT)
> - deleted = 1;
> - }
> - } else {
> - err = -ENOENT;
> + p.mnt = parent.mnt;
> + p.dentry = dentry;
> + err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
> + AT_STATX_SYNC_AS_STAT);
> + if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
> + struct iattr newattrs;
> + /*
> + * before unlinking this node, reset permissions
> + * of possible references like hardlinks
> + */
> + newattrs.ia_uid = GLOBAL_ROOT_UID;
> + newattrs.ia_gid = GLOBAL_ROOT_GID;
> + newattrs.ia_mode = stat.mode & ~0777;
> + newattrs.ia_valid =
> + ATTR_UID|ATTR_GID|ATTR_MODE;
> + inode_lock(d_inode(dentry));
> + notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL);
> + inode_unlock(d_inode(dentry));
> + err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry),
> + dentry, NULL);
> + if (!err || err == -ENOENT)
> + deleted = 1;
> }
> dput(dentry);
> inode_unlock(d_inode(parent.dentry));
> diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
> index 15725b4ce393..595b57fabc9a 100644
> --- a/fs/bcachefs/fs-ioctl.c
> +++ b/fs/bcachefs/fs-ioctl.c
> @@ -511,10 +511,6 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
> ret = -EXDEV;
> goto err;
> }
> - if (!d_is_positive(victim)) {
> - ret = -ENOENT;
> - goto err;
> - }
> ret = __bch2_unlink(dir, victim, true);
> if (!ret) {
> fsnotify_rmdir(dir, victim);
> diff --git a/fs/namei.c b/fs/namei.c
> index 3ab9440c5b93..fb6da3ca0ca5 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -2741,6 +2741,10 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct
> }
> inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
> d = lookup_one_qstr_excl(&last, path->dentry, 0);
> + if (!IS_ERR(d) && d_is_negative(d)) {
> + dput(d);
> + d = ERR_PTR(-ENOENT);
> + }
> if (IS_ERR(d)) {
> inode_unlock(path->dentry->d_inode);
> path_put(path);
> diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
> index 7f358740e958..367eaf2c78b7 100644
> --- a/kernel/audit_watch.c
> +++ b/kernel/audit_watch.c
> @@ -350,11 +350,10 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
> struct dentry *d = kern_path_locked(watch->path, parent);
> if (IS_ERR(d))
> return PTR_ERR(d);
> - if (d_is_positive(d)) {
> - /* update watch filter fields */
> - watch->dev = d->d_sb->s_dev;
> - watch->ino = d_backing_inode(d)->i_ino;
> - }
> + /* update watch filter fields */
> + watch->dev = d->d_sb->s_dev;
> + watch->ino = d_backing_inode(d)->i_ino;
> +
> inode_unlock(d_backing_inode(parent->dentry));
> dput(d);
> return 0;
> @@ -419,10 +418,11 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
> /* caller expects mutex locked */
> mutex_lock(&audit_filter_mutex);
>
> - if (ret) {
> + if (ret && ret != -ENOENT) {
> audit_put_watch(watch);
> return ret;
> }
> + ret = 0;
>
> /* either find an old parent or attach a new one */
> parent = audit_find_parent(d_backing_inode(parent_path.dentry));
Reviewed-by: Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl()
2025-02-17 0:27 ` [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl() NeilBrown
@ 2025-02-17 13:46 ` Jeff Layton
0 siblings, 0 replies; 15+ messages in thread
From: Jeff Layton @ 2025-02-17 13:46 UTC (permalink / raw)
To: NeilBrown, Christian Brauner, Alexander Viro, Jan Kara
Cc: linux-fsdevel, linux-nfs
On Mon, 2025-02-17 at 11:27 +1100, NeilBrown wrote:
> Callers of lookup_one_qstr_excl() often check if the result is negative or
> positive.
> These changes can easily be moved into lookup_one_qstr_excl() by checking the
> lookup flags:
> LOOKUP_CREATE means it is NOT an error if the name doesn't exist.
> LOOKUP_EXCL means it IS an error if the name DOES exist.
>
> This patch adds these checks, then removes error checks from callers,
> and ensures that appropriate flags are passed.
>
> This subtly changes the meaning of LOOKUP_EXCL. Previously it could
> only accompany LOOKUP_CREATE. Now it can accompany LOOKUP_RENAME_TARGET
> as well. A couple of small changes are needed to accommodate this. The
> NFS change is functionally a no-op but ensures nfs_is_exclusive_create() does
> exactly what the name says.
>
> Signed-off-by: NeilBrown <neilb@suse.de>
> Link: https://lore.kernel.org/r/20250207034040.3402438-3-neilb@suse.de
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
> Documentation/filesystems/porting.rst | 12 ++++++
> fs/namei.c | 61 +++++++++------------------
> fs/nfs/dir.c | 3 +-
> fs/smb/server/vfs.c | 26 +++++-------
> 4 files changed, 45 insertions(+), 57 deletions(-)
>
> diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
> index 2ead47e20677..3b6622fbd66b 100644
> --- a/Documentation/filesystems/porting.rst
> +++ b/Documentation/filesystems/porting.rst
> @@ -1165,3 +1165,15 @@ magic.
> kern_path_locked() and user_path_locked() no longer return a negative
> dentry so this doesn't need to be checked. If the name cannot be found,
> ERR_PTR(-ENOENT) is returned.
> +
> +** recommend**
> +
> +lookup_one_qstr_excl() is changed to return errors in more cases, so
> +these conditions don't require explicit checks.
> + - if LOOKUP_CREATE is NOT given, then the dentry won't be negative,
> + ERR_PTR(-ENOENT) is returned instead
> + - if LOOKUP_EXCL IS given, then the dentry won't be positive,
> + ERR_PTR(-EEXIST) is rreturned instread
> +
> +LOOKUP_EXCL now means "target must not exist". It can be combined with
> +LOOK_CREATE or LOOKUP_RENAME_TARGET.
> diff --git a/fs/namei.c b/fs/namei.c
> index fb6da3ca0ca5..b7cdca902803 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1670,6 +1670,8 @@ static struct dentry *lookup_dcache(const struct qstr *name,
> * dentries - as the matter of fact, this only gets called
> * when directory is guaranteed to have no in-lookup children
> * at all.
> + * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed.
> + * Will return -EEXIST if name is found and LOOKUP_EXCL was passed.
> */
> struct dentry *lookup_one_qstr_excl(const struct qstr *name,
> struct dentry *base,
> @@ -1680,7 +1682,7 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
> struct inode *dir = base->d_inode;
>
> if (dentry)
> - return dentry;
> + goto found;
>
> /* Don't create child dentry for a dead directory. */
> if (unlikely(IS_DEADDIR(dir)))
> @@ -1695,6 +1697,17 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
> dput(dentry);
> dentry = old;
> }
> +found:
> + if (IS_ERR(dentry))
> + return dentry;
> + if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) {
> + dput(dentry);
> + return ERR_PTR(-ENOENT);
> + }
> + if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) {
> + dput(dentry);
> + return ERR_PTR(-EEXIST);
> + }
> return dentry;
> }
> EXPORT_SYMBOL(lookup_one_qstr_excl);
> @@ -2741,10 +2754,6 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct
> }
> inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
> d = lookup_one_qstr_excl(&last, path->dentry, 0);
> - if (!IS_ERR(d) && d_is_negative(d)) {
> - dput(d);
> - d = ERR_PTR(-ENOENT);
> - }
> if (IS_ERR(d)) {
> inode_unlock(path->dentry->d_inode);
> path_put(path);
> @@ -4082,27 +4091,13 @@ static struct dentry *filename_create(int dfd, struct filename *name,
> * '/', and a directory wasn't requested.
> */
> if (last.name[last.len] && !want_dir)
> - create_flags = 0;
> + create_flags &= ~LOOKUP_CREATE;
> inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
> dentry = lookup_one_qstr_excl(&last, path->dentry,
> reval_flag | create_flags);
> if (IS_ERR(dentry))
> goto unlock;
>
> - error = -EEXIST;
> - if (d_is_positive(dentry))
> - goto fail;
> -
> - /*
> - * Special case - lookup gave negative, but... we had foo/bar/
> - * From the vfs_mknod() POV we just have a negative dentry -
> - * all is fine. Let's be bastards - you had / on the end, you've
> - * been asking for (non-existent) directory. -ENOENT for you.
> - */
> - if (unlikely(!create_flags)) {
> - error = -ENOENT;
> - goto fail;
> - }
> if (unlikely(err2)) {
> error = err2;
> goto fail;
> @@ -4449,10 +4444,6 @@ int do_rmdir(int dfd, struct filename *name)
> error = PTR_ERR(dentry);
> if (IS_ERR(dentry))
> goto exit3;
> - if (!dentry->d_inode) {
> - error = -ENOENT;
> - goto exit4;
> - }
> error = security_path_rmdir(&path, dentry);
> if (error)
> goto exit4;
> @@ -4583,7 +4574,7 @@ int do_unlinkat(int dfd, struct filename *name)
> if (!IS_ERR(dentry)) {
>
> /* Why not before? Because we want correct error value */
> - if (last.name[last.len] || d_is_negative(dentry))
> + if (last.name[last.len])
> goto slashes;
> inode = dentry->d_inode;
> ihold(inode);
> @@ -4617,9 +4608,7 @@ int do_unlinkat(int dfd, struct filename *name)
> return error;
>
> slashes:
> - if (d_is_negative(dentry))
> - error = -ENOENT;
> - else if (d_is_dir(dentry))
> + if (d_is_dir(dentry))
> error = -EISDIR;
> else
> error = -ENOTDIR;
> @@ -5119,7 +5108,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> struct qstr old_last, new_last;
> int old_type, new_type;
> struct inode *delegated_inode = NULL;
> - unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
> + unsigned int lookup_flags = 0, target_flags =
> + LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
> bool should_retry = false;
> int error = -EINVAL;
>
> @@ -5132,6 +5122,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
>
> if (flags & RENAME_EXCHANGE)
> target_flags = 0;
> + if (flags & RENAME_NOREPLACE)
> + target_flags |= LOOKUP_EXCL;
>
> retry:
> error = filename_parentat(olddfd, from, lookup_flags, &old_path,
> @@ -5173,23 +5165,12 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> error = PTR_ERR(old_dentry);
> if (IS_ERR(old_dentry))
> goto exit3;
> - /* source must exist */
> - error = -ENOENT;
> - if (d_is_negative(old_dentry))
> - goto exit4;
> new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
> lookup_flags | target_flags);
> error = PTR_ERR(new_dentry);
> if (IS_ERR(new_dentry))
> goto exit4;
> - error = -EEXIST;
> - if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
> - goto exit5;
> if (flags & RENAME_EXCHANGE) {
> - error = -ENOENT;
> - if (d_is_negative(new_dentry))
> - goto exit5;
> -
> if (!d_is_dir(new_dentry)) {
> error = -ENOTDIR;
> if (new_last.name[new_last.len])
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 2b04038b0e40..56cf16a72334 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -1532,7 +1532,8 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
> {
> if (NFS_PROTO(dir)->version == 2)
> return 0;
> - return flags & LOOKUP_EXCL;
> + return (flags & (LOOKUP_CREATE | LOOKUP_EXCL)) ==
> + (LOOKUP_CREATE | LOOKUP_EXCL);
> }
>
> /*
> diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
> index 6890016e1923..fe29acef5872 100644
> --- a/fs/smb/server/vfs.c
> +++ b/fs/smb/server/vfs.c
> @@ -113,11 +113,6 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
> if (IS_ERR(d))
> goto err_out;
>
> - if (d_is_negative(d)) {
> - dput(d);
> - goto err_out;
> - }
> -
> path->dentry = d;
> path->mnt = mntget(parent_path->mnt);
>
> @@ -693,6 +688,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
> struct ksmbd_file *parent_fp;
> int new_type;
> int err, lookup_flags = LOOKUP_NO_SYMLINKS;
> + int target_lookup_flags = LOOKUP_RENAME_TARGET;
>
> if (ksmbd_override_fsids(work))
> return -ENOMEM;
> @@ -703,6 +699,14 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
> goto revert_fsids;
> }
>
> + /*
> + * explicitly handle file overwrite case, for compatibility with
> + * filesystems that may not support rename flags (e.g: fuse)
> + */
> + if (flags & RENAME_NOREPLACE)
> + target_lookup_flags |= LOOKUP_EXCL;
> + flags &= ~(RENAME_NOREPLACE);
> +
> retry:
> err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
> &new_path, &new_last, &new_type,
> @@ -743,7 +747,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
> }
>
> new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
> - lookup_flags | LOOKUP_RENAME_TARGET);
> + lookup_flags | target_lookup_flags);
> if (IS_ERR(new_dentry)) {
> err = PTR_ERR(new_dentry);
> goto out3;
> @@ -754,16 +758,6 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
> goto out4;
> }
>
> - /*
> - * explicitly handle file overwrite case, for compatibility with
> - * filesystems that may not support rename flags (e.g: fuse)
> - */
> - if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
> - err = -EEXIST;
> - goto out4;
> - }
> - flags &= ~(RENAME_NOREPLACE);
> -
> if (old_child == trap) {
> err = -EINVAL;
> goto out4;
Nice simplification.
Reviewed-by: Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry
2025-02-17 0:27 ` [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry NeilBrown
2025-02-17 8:26 ` Christian Brauner
2025-02-17 13:41 ` Jeff Layton
@ 2025-04-14 11:01 ` Christian Brauner
2025-04-14 15:54 ` Christian Brauner
2 siblings, 1 reply; 15+ messages in thread
From: Christian Brauner @ 2025-04-14 11:01 UTC (permalink / raw)
To: NeilBrown
Cc: Vlastimil Babka, Alexander Viro, Jan Kara, linux-fsdevel,
linux-nfs
> diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
> index 7f358740e958..367eaf2c78b7 100644
> --- a/kernel/audit_watch.c
> +++ b/kernel/audit_watch.c
> @@ -350,11 +350,10 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
> struct dentry *d = kern_path_locked(watch->path, parent);
> if (IS_ERR(d))
> return PTR_ERR(d);
> - if (d_is_positive(d)) {
> - /* update watch filter fields */
> - watch->dev = d->d_sb->s_dev;
> - watch->ino = d_backing_inode(d)->i_ino;
> - }
> + /* update watch filter fields */
> + watch->dev = d->d_sb->s_dev;
> + watch->ino = d_backing_inode(d)->i_ino;
> +
> inode_unlock(d_backing_inode(parent->dentry));
> dput(d);
> return 0;
> @@ -419,10 +418,11 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
> /* caller expects mutex locked */
> mutex_lock(&audit_filter_mutex);
>
> - if (ret) {
> + if (ret && ret != -ENOENT) {
> audit_put_watch(watch);
> return ret;
> }
> + ret = 0;
So this is broken.
If kern_path_locked() fails due to a negative dentry and returns ENOENT
it will have already called path_put() and @parent_path is invalid.
But right after this audit does:
>
> /* either find an old parent or attach a new one */
> parent = audit_find_parent(d_backing_inode(parent_path.dentry));
and then later on calls path_put() again. So this is a UAF. We need to
fix this.
This used to work before because kern_path_locked() return a path with a
negative dentry.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry
2025-04-14 11:01 ` Christian Brauner
@ 2025-04-14 15:54 ` Christian Brauner
0 siblings, 0 replies; 15+ messages in thread
From: Christian Brauner @ 2025-04-14 15:54 UTC (permalink / raw)
To: NeilBrown
Cc: Vlastimil Babka, Alexander Viro, Jan Kara, linux-fsdevel,
linux-nfs
On Mon, Apr 14, 2025 at 01:01:53PM +0200, Christian Brauner wrote:
> > diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
> > index 7f358740e958..367eaf2c78b7 100644
> > --- a/kernel/audit_watch.c
> > +++ b/kernel/audit_watch.c
> > @@ -350,11 +350,10 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
> > struct dentry *d = kern_path_locked(watch->path, parent);
> > if (IS_ERR(d))
> > return PTR_ERR(d);
> > - if (d_is_positive(d)) {
> > - /* update watch filter fields */
> > - watch->dev = d->d_sb->s_dev;
> > - watch->ino = d_backing_inode(d)->i_ino;
> > - }
> > + /* update watch filter fields */
> > + watch->dev = d->d_sb->s_dev;
> > + watch->ino = d_backing_inode(d)->i_ino;
> > +
> > inode_unlock(d_backing_inode(parent->dentry));
> > dput(d);
> > return 0;
> > @@ -419,10 +418,11 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
> > /* caller expects mutex locked */
> > mutex_lock(&audit_filter_mutex);
> >
> > - if (ret) {
> > + if (ret && ret != -ENOENT) {
> > audit_put_watch(watch);
> > return ret;
> > }
> > + ret = 0;
>
> So this is broken.
>
> If kern_path_locked() fails due to a negative dentry and returns ENOENT
> it will have already called path_put() and @parent_path is invalid.
>
> But right after this audit does:
>
> >
> > /* either find an old parent or attach a new one */
> > parent = audit_find_parent(d_backing_inode(parent_path.dentry));
>
> and then later on calls path_put() again. So this is a UAF. We need to
> fix this.
>
> This used to work before because kern_path_locked() return a path with a
> negative dentry.
*returned the parent path even if the looked up dentry was negative
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2025-04-14 15:54 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-17 0:27 [PATCH 0/2 v2] VFS: minor improvements to a couple of interfaces NeilBrown
2025-02-17 0:27 ` [PATCH 1/2] VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry NeilBrown
2025-02-17 8:26 ` Christian Brauner
2025-02-17 13:41 ` Jeff Layton
2025-04-14 11:01 ` Christian Brauner
2025-04-14 15:54 ` Christian Brauner
2025-02-17 0:27 ` [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl() NeilBrown
2025-02-17 13:46 ` Jeff Layton
-- strict thread matches above, loose matches on Subject: below --
2025-02-07 3:36 [PATCH 0/2] VFS: minor improvements to a couple of interfaces NeilBrown
2025-02-07 3:36 ` [PATCH 2/2] VFS: add common error checks to lookup_one_qstr_excl() NeilBrown
2025-02-12 2:50 ` kernel test robot
2025-02-12 3:16 ` Al Viro
2025-02-12 3:25 ` Al Viro
2025-02-12 3:45 ` NeilBrown
2025-02-12 4:06 ` Al Viro
2025-02-12 4:40 ` NeilBrown
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).