* Re: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
2024-09-16 4:34 [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations NeilBrown
@ 2024-09-18 20:40 ` kernel test robot
2024-09-18 22:24 ` kernel test robot
` (3 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2024-09-18 20:40 UTC (permalink / raw)
To: NeilBrown; +Cc: oe-kbuild-all
Hi NeilBrown,
[This is a private test report for your RFC patch.]
kernel test robot noticed the following build errors:
[auto build test ERROR on 98f7e32f20d28ec452afb208f9cffc08448a2652]
url: https://github.com/intel-lab-lkp/linux/commits/NeilBrown/VFS-disable-new-delegations-during-delegation-breaking-operations/20240916-123616
base: 98f7e32f20d28ec452afb208f9cffc08448a2652
patch link: https://lore.kernel.org/r/172646129988.17050.4729474250083101679%40noble.neil.brown.name
patch subject: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
config: i386-allyesconfig (https://download.01.org/0day-ci/archive/20240919/202409190404.Yjqn3KXY-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240919/202409190404.Yjqn3KXY-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409190404.Yjqn3KXY-lkp@intel.com/
All error/warnings (new ones prefixed by >>):
fs/smb/server/vfs.c: In function 'ksmbd_vfs_rename':
>> fs/smb/server/vfs.c:784:12: error: 'struct renamedata' has no member named 'delegated_inode'; did you mean 'delegated_inode_new'?
784 | rd.delegated_inode = NULL,
| ^~~~~~~~~~~~~~~
| delegated_inode_new
>> fs/smb/server/vfs.c:784:39: warning: left-hand operand of comma expression has no effect [-Wunused-value]
784 | rd.delegated_inode = NULL,
| ^
vim +784 fs/smb/server/vfs.c
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 682
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 683 int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 684 char *newname, int flags)
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 685 {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 686 struct dentry *old_parent, *new_dentry, *trap;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 687 struct dentry *old_child = old_path->dentry;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 688 struct path new_path;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 689 struct qstr new_last;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 690 struct renamedata rd;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 691 struct filename *to;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 692 struct ksmbd_share_config *share_conf = work->tcon->share_conf;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 693 struct ksmbd_file *parent_fp;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 694 int new_type;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 695 int err, lookup_flags = LOOKUP_NO_SYMLINKS;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 696
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 697 if (ksmbd_override_fsids(work))
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 698 return -ENOMEM;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 699
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 700 to = getname_kernel(newname);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 701 if (IS_ERR(to)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 702 err = PTR_ERR(to);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 703 goto revert_fsids;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 704 }
4b637fc1890260 fs/cifsd/vfs.c Namjae Jeon 2021-06-18 705
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 706 retry:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 707 err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 708 &new_path, &new_last, &new_type,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 709 &share_conf->vfs_path);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 710 if (err)
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 711 goto out1;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 712
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 713 if (old_path->mnt != new_path.mnt) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 714 err = -EXDEV;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 715 goto out2;
4b637fc1890260 fs/cifsd/vfs.c Namjae Jeon 2021-06-18 716 }
4b637fc1890260 fs/cifsd/vfs.c Namjae Jeon 2021-06-18 717
40b268d384a222 fs/smb/server/vfs.c Namjae Jeon 2023-06-15 718 err = mnt_want_write(old_path->mnt);
40b268d384a222 fs/smb/server/vfs.c Namjae Jeon 2023-06-15 719 if (err)
40b268d384a222 fs/smb/server/vfs.c Namjae Jeon 2023-06-15 720 goto out2;
40b268d384a222 fs/smb/server/vfs.c Namjae Jeon 2023-06-15 721
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 722 trap = lock_rename_child(old_child, new_path.dentry);
a8b0026847b8c4 fs/smb/server/vfs.c Al Viro 2023-11-20 723 if (IS_ERR(trap)) {
a8b0026847b8c4 fs/smb/server/vfs.c Al Viro 2023-11-20 724 err = PTR_ERR(trap);
a8b0026847b8c4 fs/smb/server/vfs.c Al Viro 2023-11-20 725 goto out_drop_write;
a8b0026847b8c4 fs/smb/server/vfs.c Al Viro 2023-11-20 726 }
4b637fc1890260 fs/cifsd/vfs.c Namjae Jeon 2021-06-18 727
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 728 old_parent = dget(old_child->d_parent);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 729 if (d_unhashed(old_child)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 730 err = -EINVAL;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 731 goto out3;
c36fca8630dda0 fs/cifsd/vfs.c Namjae Jeon 2021-03-30 732 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 733
4274a9dc6aeb9f fs/smb/server/vfs.c Namjae Jeon 2023-11-20 734 parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 735 if (parent_fp) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 736 if (parent_fp->daccess & FILE_DELETE_LE) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 737 pr_err("parent dir is opened with delete access\n");
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 738 err = -ESHARE;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 739 ksmbd_fd_put(work, parent_fp);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 740 goto out3;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 741 }
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 742 ksmbd_fd_put(work, parent_fp);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 743 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 744
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 745 new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 746 lookup_flags | LOOKUP_RENAME_TARGET);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 747 if (IS_ERR(new_dentry)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 748 err = PTR_ERR(new_dentry);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 749 goto out3;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 750 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 751
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 752 if (d_is_symlink(new_dentry)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 753 err = -EACCES;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 754 goto out4;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 755 }
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 756
4973b04d3ea577 fs/smb/server/vfs.c Marios Makassikis 2024-04-15 757 /*
4973b04d3ea577 fs/smb/server/vfs.c Marios Makassikis 2024-04-15 758 * explicitly handle file overwrite case, for compatibility with
4973b04d3ea577 fs/smb/server/vfs.c Marios Makassikis 2024-04-15 759 * filesystems that may not support rename flags (e.g: fuse)
4973b04d3ea577 fs/smb/server/vfs.c Marios Makassikis 2024-04-15 760 */
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 761 if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 762 err = -EEXIST;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 763 goto out4;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 764 }
4973b04d3ea577 fs/smb/server/vfs.c Marios Makassikis 2024-04-15 765 flags &= ~(RENAME_NOREPLACE);
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 766
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 767 if (old_child == trap) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 768 err = -EINVAL;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 769 goto out4;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 770 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 771
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 772 if (new_dentry == trap) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 773 err = -ENOTEMPTY;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 774 goto out4;
265fd1991c1db8 fs/ksmbd/vfs.c Hyunchul Lee 2021-09-25 775 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 776
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 777 rd.old_mnt_idmap = mnt_idmap(old_path->mnt),
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 778 rd.old_dir = d_inode(old_parent),
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 779 rd.old_dentry = old_child,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 780 rd.new_mnt_idmap = mnt_idmap(new_path.mnt),
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 781 rd.new_dir = new_path.dentry->d_inode,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 782 rd.new_dentry = new_dentry,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 783 rd.flags = flags,
48b47f0caaa8a9 fs/smb/server/vfs.c Namjae Jeon 2023-05-12 @784 rd.delegated_inode = NULL,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 785 err = vfs_rename(&rd);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 786 if (err)
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 787 ksmbd_debug(VFS, "vfs_rename failed err %d\n", err);
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 788
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 789 out4:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 790 dput(new_dentry);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 791 out3:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 792 dput(old_parent);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 793 unlock_rename(old_parent, new_path.dentry);
a8b0026847b8c4 fs/smb/server/vfs.c Al Viro 2023-11-20 794 out_drop_write:
40b268d384a222 fs/smb/server/vfs.c Namjae Jeon 2023-06-15 795 mnt_drop_write(old_path->mnt);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 796 out2:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 797 path_put(&new_path);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 798
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 799 if (retry_estale(err, lookup_flags)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 800 lookup_flags |= LOOKUP_REVAL;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 801 goto retry;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 802 }
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 803 out1:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 804 putname(to);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 805 revert_fsids:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 806 ksmbd_revert_fsids(work);
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 807 return err;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 808 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 809
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
2024-09-16 4:34 [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations NeilBrown
2024-09-18 20:40 ` kernel test robot
@ 2024-09-18 22:24 ` kernel test robot
2024-09-25 8:56 ` Christian Brauner
` (2 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2024-09-18 22:24 UTC (permalink / raw)
To: NeilBrown; +Cc: llvm, oe-kbuild-all
Hi NeilBrown,
[This is a private test report for your RFC patch.]
kernel test robot noticed the following build errors:
[auto build test ERROR on 98f7e32f20d28ec452afb208f9cffc08448a2652]
url: https://github.com/intel-lab-lkp/linux/commits/NeilBrown/VFS-disable-new-delegations-during-delegation-breaking-operations/20240916-123616
base: 98f7e32f20d28ec452afb208f9cffc08448a2652
patch link: https://lore.kernel.org/r/172646129988.17050.4729474250083101679%40noble.neil.brown.name
patch subject: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20240919/202409190651.U8XgA17d-lkp@intel.com/config)
compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240919/202409190651.U8XgA17d-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409190651.U8XgA17d-lkp@intel.com/
All errors (new ones prefixed by >>):
>> fs/smb/server/vfs.c:784:5: error: no member named 'delegated_inode' in 'struct renamedata'
784 | rd.delegated_inode = NULL,
| ~~ ^
1 error generated.
vim +784 fs/smb/server/vfs.c
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 682
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 683 int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 684 char *newname, int flags)
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 685 {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 686 struct dentry *old_parent, *new_dentry, *trap;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 687 struct dentry *old_child = old_path->dentry;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 688 struct path new_path;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 689 struct qstr new_last;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 690 struct renamedata rd;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 691 struct filename *to;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 692 struct ksmbd_share_config *share_conf = work->tcon->share_conf;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 693 struct ksmbd_file *parent_fp;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 694 int new_type;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 695 int err, lookup_flags = LOOKUP_NO_SYMLINKS;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 696
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 697 if (ksmbd_override_fsids(work))
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 698 return -ENOMEM;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 699
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 700 to = getname_kernel(newname);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 701 if (IS_ERR(to)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 702 err = PTR_ERR(to);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 703 goto revert_fsids;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 704 }
4b637fc1890260 fs/cifsd/vfs.c Namjae Jeon 2021-06-18 705
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 706 retry:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 707 err = vfs_path_parent_lookup(to, lookup_flags | LOOKUP_BENEATH,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 708 &new_path, &new_last, &new_type,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 709 &share_conf->vfs_path);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 710 if (err)
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 711 goto out1;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 712
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 713 if (old_path->mnt != new_path.mnt) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 714 err = -EXDEV;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 715 goto out2;
4b637fc1890260 fs/cifsd/vfs.c Namjae Jeon 2021-06-18 716 }
4b637fc1890260 fs/cifsd/vfs.c Namjae Jeon 2021-06-18 717
40b268d384a222 fs/smb/server/vfs.c Namjae Jeon 2023-06-15 718 err = mnt_want_write(old_path->mnt);
40b268d384a222 fs/smb/server/vfs.c Namjae Jeon 2023-06-15 719 if (err)
40b268d384a222 fs/smb/server/vfs.c Namjae Jeon 2023-06-15 720 goto out2;
40b268d384a222 fs/smb/server/vfs.c Namjae Jeon 2023-06-15 721
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 722 trap = lock_rename_child(old_child, new_path.dentry);
a8b0026847b8c4 fs/smb/server/vfs.c Al Viro 2023-11-20 723 if (IS_ERR(trap)) {
a8b0026847b8c4 fs/smb/server/vfs.c Al Viro 2023-11-20 724 err = PTR_ERR(trap);
a8b0026847b8c4 fs/smb/server/vfs.c Al Viro 2023-11-20 725 goto out_drop_write;
a8b0026847b8c4 fs/smb/server/vfs.c Al Viro 2023-11-20 726 }
4b637fc1890260 fs/cifsd/vfs.c Namjae Jeon 2021-06-18 727
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 728 old_parent = dget(old_child->d_parent);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 729 if (d_unhashed(old_child)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 730 err = -EINVAL;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 731 goto out3;
c36fca8630dda0 fs/cifsd/vfs.c Namjae Jeon 2021-03-30 732 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 733
4274a9dc6aeb9f fs/smb/server/vfs.c Namjae Jeon 2023-11-20 734 parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 735 if (parent_fp) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 736 if (parent_fp->daccess & FILE_DELETE_LE) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 737 pr_err("parent dir is opened with delete access\n");
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 738 err = -ESHARE;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 739 ksmbd_fd_put(work, parent_fp);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 740 goto out3;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 741 }
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 742 ksmbd_fd_put(work, parent_fp);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 743 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 744
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 745 new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 746 lookup_flags | LOOKUP_RENAME_TARGET);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 747 if (IS_ERR(new_dentry)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 748 err = PTR_ERR(new_dentry);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 749 goto out3;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 750 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 751
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 752 if (d_is_symlink(new_dentry)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 753 err = -EACCES;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 754 goto out4;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 755 }
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 756
4973b04d3ea577 fs/smb/server/vfs.c Marios Makassikis 2024-04-15 757 /*
4973b04d3ea577 fs/smb/server/vfs.c Marios Makassikis 2024-04-15 758 * explicitly handle file overwrite case, for compatibility with
4973b04d3ea577 fs/smb/server/vfs.c Marios Makassikis 2024-04-15 759 * filesystems that may not support rename flags (e.g: fuse)
4973b04d3ea577 fs/smb/server/vfs.c Marios Makassikis 2024-04-15 760 */
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 761 if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 762 err = -EEXIST;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 763 goto out4;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 764 }
4973b04d3ea577 fs/smb/server/vfs.c Marios Makassikis 2024-04-15 765 flags &= ~(RENAME_NOREPLACE);
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 766
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 767 if (old_child == trap) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 768 err = -EINVAL;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 769 goto out4;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 770 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 771
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 772 if (new_dentry == trap) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 773 err = -ENOTEMPTY;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 774 goto out4;
265fd1991c1db8 fs/ksmbd/vfs.c Hyunchul Lee 2021-09-25 775 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 776
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 777 rd.old_mnt_idmap = mnt_idmap(old_path->mnt),
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 778 rd.old_dir = d_inode(old_parent),
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 779 rd.old_dentry = old_child,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 780 rd.new_mnt_idmap = mnt_idmap(new_path.mnt),
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 781 rd.new_dir = new_path.dentry->d_inode,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 782 rd.new_dentry = new_dentry,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 783 rd.flags = flags,
48b47f0caaa8a9 fs/smb/server/vfs.c Namjae Jeon 2023-05-12 @784 rd.delegated_inode = NULL,
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 785 err = vfs_rename(&rd);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 786 if (err)
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 787 ksmbd_debug(VFS, "vfs_rename failed err %d\n", err);
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 788
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 789 out4:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 790 dput(new_dentry);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 791 out3:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 792 dput(old_parent);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 793 unlock_rename(old_parent, new_path.dentry);
a8b0026847b8c4 fs/smb/server/vfs.c Al Viro 2023-11-20 794 out_drop_write:
40b268d384a222 fs/smb/server/vfs.c Namjae Jeon 2023-06-15 795 mnt_drop_write(old_path->mnt);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 796 out2:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 797 path_put(&new_path);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 798
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 799 if (retry_estale(err, lookup_flags)) {
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 800 lookup_flags |= LOOKUP_REVAL;
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 801 goto retry;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 802 }
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 803 out1:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 804 putname(to);
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 805 revert_fsids:
74d7970febf7e9 fs/ksmbd/vfs.c Namjae Jeon 2023-04-21 806 ksmbd_revert_fsids(work);
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 807 return err;
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 808 }
f44158485826c0 fs/cifsd/vfs.c Namjae Jeon 2021-03-16 809
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
2024-09-16 4:34 [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations NeilBrown
2024-09-18 20:40 ` kernel test robot
2024-09-18 22:24 ` kernel test robot
@ 2024-09-25 8:56 ` Christian Brauner
2024-09-25 22:19 ` Al Viro
2024-09-30 14:04 ` Jeff Layton
4 siblings, 0 replies; 10+ messages in thread
From: Christian Brauner @ 2024-09-25 8:56 UTC (permalink / raw)
To: NeilBrown
Cc: Jeff Layton, Alexander Viro, Jan Kara, linux-fsdevel, Chuck Lever,
Olga Kornievskaia, linux-nfs
On Mon, Sep 16, 2024 at 02:34:59PM GMT, NeilBrown wrote:
>
> Various operations such as rename and unlink must break any delegations
> before they proceed.
> do_dentry_open() and vfs_truncate(), which use break_lease(), increment
> i_writecount and/or i_readcount which blocks delegations until the
> counter is decremented, but the various callers of try_break_deleg() do
> not impose any such barrier. They hold the inode lock while performing
> the operation which blocks delegations, but must drop it while waiting
> for a delegation to be broken, which leaves an opportunity for a new
> delegation to be added.
>
> nfsd - the only current user of delegations - records any files on which
> it is called to break a delegation in a manner which blocks further
> delegations for 30-60 seconds. This is normally sufficient. However
> there is talk of reducing the timeout and it would be best if operations
> that needed delegations to be blocked used something more definitive
> than a timer.
>
> This patch adds that definitive blocking by adding a counter to struct
> file_lock_context of the number of concurrent operations which require
> delegations to be blocked. check_conflicting_open() checks that counter
> when a delegation is requested and denies the delegation if the counter
> is elevated.
>
> try_break_deleg() now increments that counter when it records the inode
> as a 'delegated_inode'.
>
> break_deleg_wait() now leaves the inode pointer in *delegated_inode when
> it signals that the operation should be retried, and then clears it -
> decrementing the new counter - when the operation has completed, whether
> successfully or not. To achieve this we now pass the current error
> status in to break_deleg_wait().
>
> vfs_rename() now uses two delegated_inode pointers, one for the
> source and one for the destination in the case of replacement. This is
> needed as it may be necessary to block further delegations to both
> inodes while the rename completes.
I'm not intimiately familiar with delegations but the reasoning seems
sound to me and I don't spot anything obvious in the code. I will defer
to Jeff for a decision.
Is there any potential for deadlocks due to ordering issues when calling
__break_lease() on source and target?
>
> The net result is that we no longer depend on the delay that nfsd
> imposes on new delegation in order for various functions that break
> delegations to be sure that new delegations won't be added while they wait
> with the inode unlocked. This gives more freedom to nfsd to make more
> subtle choices about when and for how long to block delegations when
> there is no active contention.
>
> try_break_deleg() is possibly now large enough that it shouldn't be
> inline.
>
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
I looked a bit for broader documentation on delegations/leases and it
seems we don't have any. It would be nice if we could add something
to Documentation/filesystems/.
> fs/locks.c | 12 ++++++++++--
> fs/namei.c | 32 ++++++++++++++++++++------------
> fs/open.c | 8 ++++----
> fs/posix_acl.c | 8 ++++----
> fs/utimes.c | 4 ++--
> fs/xattr.c | 8 ++++----
> include/linux/filelock.h | 31 ++++++++++++++++++++++++-------
> include/linux/fs.h | 3 ++-
> 8 files changed, 70 insertions(+), 36 deletions(-)
>
> diff --git a/fs/locks.c b/fs/locks.c
> index e45cad40f8b6..171628094daa 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -191,6 +191,7 @@ locks_get_lock_context(struct inode *inode, int type)
> INIT_LIST_HEAD(&ctx->flc_flock);
> INIT_LIST_HEAD(&ctx->flc_posix);
> INIT_LIST_HEAD(&ctx->flc_lease);
> + atomic_set(&ctx->flc_deleg_blockers, 0);
>
> /*
> * Assign the pointer if it's not already assigned. If it is, then
> @@ -255,6 +256,7 @@ locks_free_lock_context(struct inode *inode)
> struct file_lock_context *ctx = locks_inode_context(inode);
>
> if (unlikely(ctx)) {
> + WARN_ON(atomic_read(&ctx->flc_deleg_blockers) != 0);
> locks_check_ctx_lists(inode);
> kmem_cache_free(flctx_cache, ctx);
> }
> @@ -1743,9 +1745,15 @@ check_conflicting_open(struct file *filp, const int arg, int flags)
>
> if (flags & FL_LAYOUT)
> return 0;
> - if (flags & FL_DELEG)
> - /* We leave these checks to the caller */
> + if (flags & FL_DELEG) {
> + struct file_lock_context *ctx = locks_inode_context(inode);
> +
> + if (ctx && atomic_read(&ctx->flc_deleg_blockers) > 0)
> + return -EAGAIN;
> +
> + /* We leave the remaining checks to the caller */
> return 0;
> + }
>
> if (arg == F_RDLCK)
> return inode_is_open_for_write(inode) ? -EAGAIN : 0;
> diff --git a/fs/namei.c b/fs/namei.c
> index 5512cb10fa89..3054da90276b 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -4493,8 +4493,8 @@ int do_unlinkat(int dfd, struct filename *name)
> iput(inode); /* truncate the inode here */
> inode = NULL;
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> mnt_drop_write(path.mnt);
> @@ -4764,8 +4764,8 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
> out_dput:
> done_path_create(&new_path, new_dentry);
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error) {
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK) {
> path_put(&old_path);
> goto retry;
> }
> @@ -4848,7 +4848,8 @@ int vfs_rename(struct renamedata *rd)
> struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
> struct dentry *old_dentry = rd->old_dentry;
> struct dentry *new_dentry = rd->new_dentry;
> - struct inode **delegated_inode = rd->delegated_inode;
> + struct inode **delegated_inode_old = rd->delegated_inode_old;
> + struct inode **delegated_inode_new = rd->delegated_inode_new;
> unsigned int flags = rd->flags;
> bool is_dir = d_is_dir(old_dentry);
> struct inode *source = old_dentry->d_inode;
> @@ -4954,12 +4955,12 @@ int vfs_rename(struct renamedata *rd)
> goto out;
> }
> if (!is_dir) {
> - error = try_break_deleg(source, delegated_inode);
> + error = try_break_deleg(source, delegated_inode_old);
> if (error)
> goto out;
> }
> if (target && !new_is_dir) {
> - error = try_break_deleg(target, delegated_inode);
> + error = try_break_deleg(target, delegated_inode_new);
> if (error)
> goto out;
> }
> @@ -5011,7 +5012,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> struct path old_path, new_path;
> struct qstr old_last, new_last;
> int old_type, new_type;
> - struct inode *delegated_inode = NULL;
> + struct inode *delegated_inode_old = NULL;
> + struct inode *delegated_inode_new = NULL;
> unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
> bool should_retry = false;
> int error = -EINVAL;
> @@ -5118,7 +5120,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> rd.new_dir = new_path.dentry->d_inode;
> rd.new_dentry = new_dentry;
> rd.new_mnt_idmap = mnt_idmap(new_path.mnt);
> - rd.delegated_inode = &delegated_inode;
> + rd.delegated_inode_old = &delegated_inode_old;
> + rd.delegated_inode_new = &delegated_inode_new;
> rd.flags = flags;
> error = vfs_rename(&rd);
> exit5:
> @@ -5128,9 +5131,14 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> exit3:
> unlock_rename(new_path.dentry, old_path.dentry);
> exit_lock_rename:
> - if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + if (delegated_inode_old) {
> + error = break_deleg_wait(&delegated_inode_old, error);
> + if (error == -EWOULDBLOCK)
> + goto retry_deleg;
> + }
> + if (delegated_inode_new) {
> + error = break_deleg_wait(&delegated_inode_new, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> mnt_drop_write(old_path.mnt);
> diff --git a/fs/open.c b/fs/open.c
> index 22adbef7ecc2..6b6d20a68dd8 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -656,8 +656,8 @@ int chmod_common(const struct path *path, umode_t mode)
> out_unlock:
> inode_unlock(inode);
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> mnt_drop_write(path->mnt);
> @@ -795,8 +795,8 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
> &delegated_inode);
> inode_unlock(inode);
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> return error;
> diff --git a/fs/posix_acl.c b/fs/posix_acl.c
> index 3f87297dbfdb..5eb3635d1067 100644
> --- a/fs/posix_acl.c
> +++ b/fs/posix_acl.c
> @@ -1143,8 +1143,8 @@ int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
> inode_unlock(inode);
>
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
>
> @@ -1251,8 +1251,8 @@ int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
> inode_unlock(inode);
>
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
>
> diff --git a/fs/utimes.c b/fs/utimes.c
> index 3701b3946f88..21b7605551dc 100644
> --- a/fs/utimes.c
> +++ b/fs/utimes.c
> @@ -67,8 +67,8 @@ int vfs_utimes(const struct path *path, struct timespec64 *times)
> &delegated_inode);
> inode_unlock(inode);
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
>
> diff --git a/fs/xattr.c b/fs/xattr.c
> index 7672ce5486c5..63e0b067dab9 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -323,8 +323,8 @@ vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
> inode_unlock(inode);
>
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> if (value != orig_value)
> @@ -577,8 +577,8 @@ vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
> inode_unlock(inode);
>
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
>
> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
> index daee999d05f3..66470ba9658c 100644
> --- a/include/linux/filelock.h
> +++ b/include/linux/filelock.h
> @@ -144,6 +144,7 @@ struct file_lock_context {
> struct list_head flc_flock;
> struct list_head flc_posix;
> struct list_head flc_lease;
> + atomic_t flc_deleg_blockers;
> };
>
> #ifdef CONFIG_FILE_LOCKING
> @@ -450,21 +451,37 @@ static inline int try_break_deleg(struct inode *inode, struct inode **delegated_
> {
> int ret;
>
> + if (delegated_inode && *delegated_inode) {
> + if (*delegated_inode == inode)
> + /* Don't need to count this */
> + return break_deleg(inode, O_WRONLY|O_NONBLOCK);
> +
> + /* inode changed, forget the old one */
> + atomic_dec(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
> + iput(*delegated_inode);
> + *delegated_inode = NULL;
> + }
> ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
> if (ret == -EWOULDBLOCK && delegated_inode) {
> *delegated_inode = inode;
> + atomic_inc(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
> ihold(inode);
> }
> return ret;
> }
>
> -static inline int break_deleg_wait(struct inode **delegated_inode)
> +static inline int break_deleg_wait(struct inode **delegated_inode, int ret)
> {
> - int ret;
> -
> - ret = break_deleg(*delegated_inode, O_WRONLY);
> - iput(*delegated_inode);
> - *delegated_inode = NULL;
> + if (ret == -EWOULDBLOCK) {
> + ret = break_deleg(*delegated_inode, O_WRONLY);
> + if (ret == 0)
> + ret = -EWOULDBLOCK;
> + }
> + if (ret != -EWOULDBLOCK) {
> + atomic_dec(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
> + iput(*delegated_inode);
> + *delegated_inode = NULL;
> + }
> return ret;
> }
>
> @@ -494,7 +511,7 @@ static inline int try_break_deleg(struct inode *inode, struct inode **delegated_
> return 0;
> }
>
> -static inline int break_deleg_wait(struct inode **delegated_inode)
> +static inline int break_deleg_wait(struct inode **delegated_inode, int ret)
> {
> BUG();
> return 0;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 6ca11e241a24..50957d9e1c2b 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1902,7 +1902,8 @@ struct renamedata {
> struct mnt_idmap *new_mnt_idmap;
> struct inode *new_dir;
> struct dentry *new_dentry;
> - struct inode **delegated_inode;
> + struct inode **delegated_inode_old;
> + struct inode **delegated_inode_new;
> unsigned int flags;
> } __randomize_layout;
>
>
> base-commit: 98f7e32f20d28ec452afb208f9cffc08448a2652
> --
> 2.46.0
>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
2024-09-16 4:34 [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations NeilBrown
` (2 preceding siblings ...)
2024-09-25 8:56 ` Christian Brauner
@ 2024-09-25 22:19 ` Al Viro
2024-09-25 22:42 ` NeilBrown
2024-09-30 14:04 ` Jeff Layton
4 siblings, 1 reply; 10+ messages in thread
From: Al Viro @ 2024-09-25 22:19 UTC (permalink / raw)
To: NeilBrown
Cc: Jeff Layton, Christian Brauner, Jan Kara, linux-fsdevel,
Chuck Lever, Olga Kornievskaia, linux-nfs
On Mon, Sep 16, 2024 at 02:34:59PM +1000, NeilBrown wrote:
> @@ -5011,7 +5012,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> struct path old_path, new_path;
> struct qstr old_last, new_last;
> int old_type, new_type;
> - struct inode *delegated_inode = NULL;
> + struct inode *delegated_inode_old = NULL;
> + struct inode *delegated_inode_new = NULL;
> unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
> bool should_retry = false;
> int error = -EINVAL;
> @@ -5118,7 +5120,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> rd.new_dir = new_path.dentry->d_inode;
> rd.new_dentry = new_dentry;
> rd.new_mnt_idmap = mnt_idmap(new_path.mnt);
> - rd.delegated_inode = &delegated_inode;
> + rd.delegated_inode_old = &delegated_inode_old;
> + rd.delegated_inode_new = &delegated_inode_new;
> rd.flags = flags;
> error = vfs_rename(&rd);
> exit5:
> @@ -5128,9 +5131,14 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> exit3:
> unlock_rename(new_path.dentry, old_path.dentry);
> exit_lock_rename:
> - if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + if (delegated_inode_old) {
> + error = break_deleg_wait(&delegated_inode_old, error);
> + if (error == -EWOULDBLOCK)
> + goto retry_deleg;
Won't that goto leak a reference to delegated_inode_new?
> + }
> + if (delegated_inode_new) {
> + error = break_deleg_wait(&delegated_inode_new, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> mnt_drop_write(old_path.mnt);
> -static inline int break_deleg_wait(struct inode **delegated_inode)
> +static inline int break_deleg_wait(struct inode **delegated_inode, int ret)
> {
> - int ret;
> -
> - ret = break_deleg(*delegated_inode, O_WRONLY);
> - iput(*delegated_inode);
> - *delegated_inode = NULL;
> + if (ret == -EWOULDBLOCK) {
> + ret = break_deleg(*delegated_inode, O_WRONLY);
> + if (ret == 0)
> + ret = -EWOULDBLOCK;
> + }
> + if (ret != -EWOULDBLOCK) {
> + atomic_dec(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
> + iput(*delegated_inode);
> + *delegated_inode = NULL;
> + }
> return ret;
> }
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
2024-09-25 22:19 ` Al Viro
@ 2024-09-25 22:42 ` NeilBrown
2024-09-25 23:06 ` Al Viro
0 siblings, 1 reply; 10+ messages in thread
From: NeilBrown @ 2024-09-25 22:42 UTC (permalink / raw)
To: Al Viro
Cc: Jeff Layton, Christian Brauner, Jan Kara, linux-fsdevel,
Chuck Lever, Olga Kornievskaia, linux-nfs
On Thu, 26 Sep 2024, Al Viro wrote:
> On Mon, Sep 16, 2024 at 02:34:59PM +1000, NeilBrown wrote:
>
> > @@ -5011,7 +5012,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> > struct path old_path, new_path;
> > struct qstr old_last, new_last;
> > int old_type, new_type;
> > - struct inode *delegated_inode = NULL;
> > + struct inode *delegated_inode_old = NULL;
> > + struct inode *delegated_inode_new = NULL;
> > unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
> > bool should_retry = false;
> > int error = -EINVAL;
> > @@ -5118,7 +5120,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> > rd.new_dir = new_path.dentry->d_inode;
> > rd.new_dentry = new_dentry;
> > rd.new_mnt_idmap = mnt_idmap(new_path.mnt);
> > - rd.delegated_inode = &delegated_inode;
> > + rd.delegated_inode_old = &delegated_inode_old;
> > + rd.delegated_inode_new = &delegated_inode_new;
> > rd.flags = flags;
> > error = vfs_rename(&rd);
> > exit5:
> > @@ -5128,9 +5131,14 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> > exit3:
> > unlock_rename(new_path.dentry, old_path.dentry);
> > exit_lock_rename:
> > - if (delegated_inode) {
> > - error = break_deleg_wait(&delegated_inode);
> > - if (!error)
> > + if (delegated_inode_old) {
> > + error = break_deleg_wait(&delegated_inode_old, error);
> > + if (error == -EWOULDBLOCK)
> > + goto retry_deleg;
>
> Won't that goto leak a reference to delegated_inode_new?
I don't think so.
The old delegated_inode_new will be carried in to vfs_rename() and
passed to try_break_deleg() which will notice that it is not-NULL and
will "do the right thing".
Both _old and _new are initialised to zero at the start of
do_renameat2(), Both are passed to break_deleg_wait() on the last time
through the retry_deleg loop which will drop the references - or will
preserve the reference if it isn't the last time - and both are only set
by try_break_deleg() which is careful to check if a prior value exists.
So I think there are no leaks.
Thanks,
NeilBrown
>
> > + }
> > + if (delegated_inode_new) {
> > + error = break_deleg_wait(&delegated_inode_new, error);
> > + if (error == -EWOULDBLOCK)
> > goto retry_deleg;
> > }
> > mnt_drop_write(old_path.mnt);
>
> > -static inline int break_deleg_wait(struct inode **delegated_inode)
> > +static inline int break_deleg_wait(struct inode **delegated_inode, int ret)
> > {
> > - int ret;
> > -
> > - ret = break_deleg(*delegated_inode, O_WRONLY);
> > - iput(*delegated_inode);
> > - *delegated_inode = NULL;
> > + if (ret == -EWOULDBLOCK) {
> > + ret = break_deleg(*delegated_inode, O_WRONLY);
> > + if (ret == 0)
> > + ret = -EWOULDBLOCK;
> > + }
> > + if (ret != -EWOULDBLOCK) {
> > + atomic_dec(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
> > + iput(*delegated_inode);
> > + *delegated_inode = NULL;
> > + }
> > return ret;
> > }
>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
2024-09-25 22:42 ` NeilBrown
@ 2024-09-25 23:06 ` Al Viro
2024-09-25 23:46 ` NeilBrown
0 siblings, 1 reply; 10+ messages in thread
From: Al Viro @ 2024-09-25 23:06 UTC (permalink / raw)
To: NeilBrown
Cc: Jeff Layton, Christian Brauner, Jan Kara, linux-fsdevel,
Chuck Lever, Olga Kornievskaia, linux-nfs
On Thu, Sep 26, 2024 at 08:42:06AM +1000, NeilBrown wrote:
> I don't think so.
> The old delegated_inode_new will be carried in to vfs_rename() and
> passed to try_break_deleg() which will notice that it is not-NULL and
> will "do the right thing".
>
> Both _old and _new are initialised to zero at the start of
> do_renameat2(), Both are passed to break_deleg_wait() on the last time
> through the retry_deleg loop which will drop the references - or will
> preserve the reference if it isn't the last time - and both are only set
> by try_break_deleg() which is careful to check if a prior value exists.
> So I think there are no leaks.
Yecchhhh... What happens if break_deleg() in there returns e.g. -ENOMEM
when try_break_deleg() finds a matching inode?
I'm not even saying it won't work, but it's way too brittle for my taste ;-/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
2024-09-25 23:06 ` Al Viro
@ 2024-09-25 23:46 ` NeilBrown
0 siblings, 0 replies; 10+ messages in thread
From: NeilBrown @ 2024-09-25 23:46 UTC (permalink / raw)
To: Al Viro
Cc: Jeff Layton, Christian Brauner, Jan Kara, linux-fsdevel,
Chuck Lever, Olga Kornievskaia, linux-nfs
On Thu, 26 Sep 2024, Al Viro wrote:
> On Thu, Sep 26, 2024 at 08:42:06AM +1000, NeilBrown wrote:
>
> > I don't think so.
> > The old delegated_inode_new will be carried in to vfs_rename() and
> > passed to try_break_deleg() which will notice that it is not-NULL and
> > will "do the right thing".
> >
> > Both _old and _new are initialised to zero at the start of
> > do_renameat2(), Both are passed to break_deleg_wait() on the last time
> > through the retry_deleg loop which will drop the references - or will
> > preserve the reference if it isn't the last time - and both are only set
> > by try_break_deleg() which is careful to check if a prior value exists.
> > So I think there are no leaks.
>
> Yecchhhh... What happens if break_deleg() in there returns e.g. -ENOMEM
> when try_break_deleg() finds a matching inode?
I don't see that that changes the calculus at all.
>
> I'm not even saying it won't work, but it's way too brittle for my taste ;-/
>
I accept that the interactions are not immediately obvious and that is a
problem. Maybe that could be fixed with documentation/code-comments.
I'm certainly open to suggestions for restructuring the code to make it
more obvious but as yet I cannot see a good alternative. The separation
of responsibility between do_renameat2 and vfs_rename seems about right
as between do_linkat and vfs_link etc. We want to keep the delegation
blocked all around the loop to the next retry. Doing that through
delegated_inode (or _new and _old versions) seems to work....
What particularly seems brittle to you? Maybe if I have one detail to
focus on.
Thanks,
NeilBrown
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
2024-09-16 4:34 [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations NeilBrown
` (3 preceding siblings ...)
2024-09-25 22:19 ` Al Viro
@ 2024-09-30 14:04 ` Jeff Layton
2024-09-30 14:15 ` Chuck Lever III
4 siblings, 1 reply; 10+ messages in thread
From: Jeff Layton @ 2024-09-30 14:04 UTC (permalink / raw)
To: NeilBrown, Alexander Viro, Christian Brauner
Cc: Jan Kara, linux-fsdevel, Chuck Lever, Olga Kornievskaia,
linux-nfs
On Mon, 2024-09-16 at 14:34 +1000, NeilBrown wrote:
> Various operations such as rename and unlink must break any delegations
> before they proceed.
> do_dentry_open() and vfs_truncate(), which use break_lease(), increment
> i_writecount and/or i_readcount which blocks delegations until the
> counter is decremented, but the various callers of try_break_deleg() do
> not impose any such barrier. They hold the inode lock while performing
> the operation which blocks delegations, but must drop it while waiting
> for a delegation to be broken, which leaves an opportunity for a new
> delegation to be added.
>
> nfsd - the only current user of delegations - records any files on which
> it is called to break a delegation in a manner which blocks further
> delegations for 30-60 seconds. This is normally sufficient. However
> there is talk of reducing the timeout and it would be best if operations
> that needed delegations to be blocked used something more definitive
> than a timer.
>
> This patch adds that definitive blocking by adding a counter to struct
> file_lock_context of the number of concurrent operations which require
> delegations to be blocked. check_conflicting_open() checks that counter
> when a delegation is requested and denies the delegation if the counter
> is elevated.
>
> try_break_deleg() now increments that counter when it records the inode
> as a 'delegated_inode'.
>
> break_deleg_wait() now leaves the inode pointer in *delegated_inode when
> it signals that the operation should be retried, and then clears it -
> decrementing the new counter - when the operation has completed, whether
> successfully or not. To achieve this we now pass the current error
> status in to break_deleg_wait().
>
> vfs_rename() now uses two delegated_inode pointers, one for the
> source and one for the destination in the case of replacement. This is
> needed as it may be necessary to block further delegations to both
> inodes while the rename completes.
>
> The net result is that we no longer depend on the delay that nfsd
> imposes on new delegation in order for various functions that break
> delegations to be sure that new delegations won't be added while they wait
> with the inode unlocked. This gives more freedom to nfsd to make more
> subtle choices about when and for how long to block delegations when
> there is no active contention.
>
> try_break_deleg() is possibly now large enough that it shouldn't be
> inline.
>
I like this approach. Moving the blocking of new delegations into the
VFS layer makes sense, and we can do better, more targeted blocking of
new leases this way.
I wonder -- do we still need the bloom filter if we do this? We could
keep track of the time of the last recall in i_flctx as well, and then
avoid handing them out until some time has elapsed.
try_break_deleg() (and break_deleg_wait(), for that matter) were
already a bit large for inlines, so moving them to regular functions
sounds like a good idea. Maybe we can even have inline fastpath
wrappers around them that check for i_flctx == NULL and avoid the jmp
in that case?
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
> fs/locks.c | 12 ++++++++++--
> fs/namei.c | 32 ++++++++++++++++++++------------
> fs/open.c | 8 ++++----
> fs/posix_acl.c | 8 ++++----
> fs/utimes.c | 4 ++--
> fs/xattr.c | 8 ++++----
> include/linux/filelock.h | 31 ++++++++++++++++++++++++-------
> include/linux/fs.h | 3 ++-
> 8 files changed, 70 insertions(+), 36 deletions(-)
>
> diff --git a/fs/locks.c b/fs/locks.c
> index e45cad40f8b6..171628094daa 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -191,6 +191,7 @@ locks_get_lock_context(struct inode *inode, int type)
> INIT_LIST_HEAD(&ctx->flc_flock);
> INIT_LIST_HEAD(&ctx->flc_posix);
> INIT_LIST_HEAD(&ctx->flc_lease);
> + atomic_set(&ctx->flc_deleg_blockers, 0);
>
> /*
> * Assign the pointer if it's not already assigned. If it is, then
> @@ -255,6 +256,7 @@ locks_free_lock_context(struct inode *inode)
> struct file_lock_context *ctx = locks_inode_context(inode);
>
> if (unlikely(ctx)) {
> + WARN_ON(atomic_read(&ctx->flc_deleg_blockers) != 0);
> locks_check_ctx_lists(inode);
> kmem_cache_free(flctx_cache, ctx);
> }
> @@ -1743,9 +1745,15 @@ check_conflicting_open(struct file *filp, const int arg, int flags)
>
> if (flags & FL_LAYOUT)
> return 0;
> - if (flags & FL_DELEG)
> - /* We leave these checks to the caller */
> + if (flags & FL_DELEG) {
> + struct file_lock_context *ctx = locks_inode_context(inode);
> +
> + if (ctx && atomic_read(&ctx->flc_deleg_blockers) > 0)
> + return -EAGAIN;
> +
> + /* We leave the remaining checks to the caller */
> return 0;
> + }
>
> if (arg == F_RDLCK)
> return inode_is_open_for_write(inode) ? -EAGAIN : 0;
> diff --git a/fs/namei.c b/fs/namei.c
> index 5512cb10fa89..3054da90276b 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -4493,8 +4493,8 @@ int do_unlinkat(int dfd, struct filename *name)
> iput(inode); /* truncate the inode here */
> inode = NULL;
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> mnt_drop_write(path.mnt);
> @@ -4764,8 +4764,8 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
> out_dput:
> done_path_create(&new_path, new_dentry);
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error) {
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK) {
> path_put(&old_path);
> goto retry;
> }
> @@ -4848,7 +4848,8 @@ int vfs_rename(struct renamedata *rd)
> struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
> struct dentry *old_dentry = rd->old_dentry;
> struct dentry *new_dentry = rd->new_dentry;
> - struct inode **delegated_inode = rd->delegated_inode;
> + struct inode **delegated_inode_old = rd->delegated_inode_old;
> + struct inode **delegated_inode_new = rd->delegated_inode_new;
> unsigned int flags = rd->flags;
> bool is_dir = d_is_dir(old_dentry);
> struct inode *source = old_dentry->d_inode;
> @@ -4954,12 +4955,12 @@ int vfs_rename(struct renamedata *rd)
> goto out;
> }
> if (!is_dir) {
> - error = try_break_deleg(source, delegated_inode);
> + error = try_break_deleg(source, delegated_inode_old);
> if (error)
> goto out;
> }
> if (target && !new_is_dir) {
> - error = try_break_deleg(target, delegated_inode);
> + error = try_break_deleg(target, delegated_inode_new);
> if (error)
> goto out;
> }
> @@ -5011,7 +5012,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> struct path old_path, new_path;
> struct qstr old_last, new_last;
> int old_type, new_type;
> - struct inode *delegated_inode = NULL;
> + struct inode *delegated_inode_old = NULL;
> + struct inode *delegated_inode_new = NULL;
> unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
> bool should_retry = false;
> int error = -EINVAL;
> @@ -5118,7 +5120,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> rd.new_dir = new_path.dentry->d_inode;
> rd.new_dentry = new_dentry;
> rd.new_mnt_idmap = mnt_idmap(new_path.mnt);
> - rd.delegated_inode = &delegated_inode;
> + rd.delegated_inode_old = &delegated_inode_old;
> + rd.delegated_inode_new = &delegated_inode_new;
> rd.flags = flags;
> error = vfs_rename(&rd);
> exit5:
> @@ -5128,9 +5131,14 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
> exit3:
> unlock_rename(new_path.dentry, old_path.dentry);
> exit_lock_rename:
> - if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + if (delegated_inode_old) {
> + error = break_deleg_wait(&delegated_inode_old, error);
> + if (error == -EWOULDBLOCK)
> + goto retry_deleg;
> + }
> + if (delegated_inode_new) {
> + error = break_deleg_wait(&delegated_inode_new, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> mnt_drop_write(old_path.mnt);
> diff --git a/fs/open.c b/fs/open.c
> index 22adbef7ecc2..6b6d20a68dd8 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -656,8 +656,8 @@ int chmod_common(const struct path *path, umode_t mode)
> out_unlock:
> inode_unlock(inode);
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> mnt_drop_write(path->mnt);
> @@ -795,8 +795,8 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
> &delegated_inode);
> inode_unlock(inode);
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> return error;
> diff --git a/fs/posix_acl.c b/fs/posix_acl.c
> index 3f87297dbfdb..5eb3635d1067 100644
> --- a/fs/posix_acl.c
> +++ b/fs/posix_acl.c
> @@ -1143,8 +1143,8 @@ int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
> inode_unlock(inode);
>
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
>
> @@ -1251,8 +1251,8 @@ int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
> inode_unlock(inode);
>
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
>
> diff --git a/fs/utimes.c b/fs/utimes.c
> index 3701b3946f88..21b7605551dc 100644
> --- a/fs/utimes.c
> +++ b/fs/utimes.c
> @@ -67,8 +67,8 @@ int vfs_utimes(const struct path *path, struct timespec64 *times)
> &delegated_inode);
> inode_unlock(inode);
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
>
> diff --git a/fs/xattr.c b/fs/xattr.c
> index 7672ce5486c5..63e0b067dab9 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -323,8 +323,8 @@ vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
> inode_unlock(inode);
>
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
> if (value != orig_value)
> @@ -577,8 +577,8 @@ vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
> inode_unlock(inode);
>
> if (delegated_inode) {
> - error = break_deleg_wait(&delegated_inode);
> - if (!error)
> + error = break_deleg_wait(&delegated_inode, error);
> + if (error == -EWOULDBLOCK)
> goto retry_deleg;
> }
>
> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
> index daee999d05f3..66470ba9658c 100644
> --- a/include/linux/filelock.h
> +++ b/include/linux/filelock.h
> @@ -144,6 +144,7 @@ struct file_lock_context {
> struct list_head flc_flock;
> struct list_head flc_posix;
> struct list_head flc_lease;
> + atomic_t flc_deleg_blockers;
> };
>
> #ifdef CONFIG_FILE_LOCKING
> @@ -450,21 +451,37 @@ static inline int try_break_deleg(struct inode *inode, struct inode **delegated_
> {
> int ret;
>
> + if (delegated_inode && *delegated_inode) {
> + if (*delegated_inode == inode)
> + /* Don't need to count this */
> + return break_deleg(inode, O_WRONLY|O_NONBLOCK);
> +
> + /* inode changed, forget the old one */
> + atomic_dec(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
> + iput(*delegated_inode);
> + *delegated_inode = NULL;
> + }
> ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
> if (ret == -EWOULDBLOCK && delegated_inode) {
> *delegated_inode = inode;
> + atomic_inc(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
> ihold(inode);
> }
> return ret;
> }
>
> -static inline int break_deleg_wait(struct inode **delegated_inode)
> +static inline int break_deleg_wait(struct inode **delegated_inode, int ret)
> {
> - int ret;
> -
> - ret = break_deleg(*delegated_inode, O_WRONLY);
> - iput(*delegated_inode);
> - *delegated_inode = NULL;
> + if (ret == -EWOULDBLOCK) {
> + ret = break_deleg(*delegated_inode, O_WRONLY);
> + if (ret == 0)
> + ret = -EWOULDBLOCK;
> + }
> + if (ret != -EWOULDBLOCK) {
> + atomic_dec(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
> + iput(*delegated_inode);
> + *delegated_inode = NULL;
> + }
> return ret;
> }
>
> @@ -494,7 +511,7 @@ static inline int try_break_deleg(struct inode *inode, struct inode **delegated_
> return 0;
> }
>
> -static inline int break_deleg_wait(struct inode **delegated_inode)
> +static inline int break_deleg_wait(struct inode **delegated_inode, int ret)
> {
> BUG();
> return 0;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 6ca11e241a24..50957d9e1c2b 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1902,7 +1902,8 @@ struct renamedata {
> struct mnt_idmap *new_mnt_idmap;
> struct inode *new_dir;
> struct dentry *new_dentry;
> - struct inode **delegated_inode;
> + struct inode **delegated_inode_old;
> + struct inode **delegated_inode_new;
> unsigned int flags;
> } __randomize_layout;
>
>
> base-commit: 98f7e32f20d28ec452afb208f9cffc08448a2652
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH - RFC] VFS: disable new delegations during delegation-breaking operations
2024-09-30 14:04 ` Jeff Layton
@ 2024-09-30 14:15 ` Chuck Lever III
0 siblings, 0 replies; 10+ messages in thread
From: Chuck Lever III @ 2024-09-30 14:15 UTC (permalink / raw)
To: Neil Brown
Cc: Jeff Layton, Al Viro, Christian Brauner, Jan Kara, Linux FS Devel,
Olga Kornievskaia, Linux NFS Mailing List
> On Sep 30, 2024, at 10:04 AM, Jeff Layton <jlayton@kernel.org> wrote:
>
> On Mon, 2024-09-16 at 14:34 +1000, NeilBrown wrote:
>> Various operations such as rename and unlink must break any delegations
>> before they proceed.
>> do_dentry_open() and vfs_truncate(), which use break_lease(), increment
>> i_writecount and/or i_readcount which blocks delegations until the
>> counter is decremented, but the various callers of try_break_deleg() do
>> not impose any such barrier. They hold the inode lock while performing
>> the operation which blocks delegations, but must drop it while waiting
>> for a delegation to be broken, which leaves an opportunity for a new
>> delegation to be added.
>>
>> nfsd - the only current user of delegations - records any files on which
>> it is called to break a delegation in a manner which blocks further
>> delegations for 30-60 seconds. This is normally sufficient. However
>> there is talk of reducing the timeout and it would be best if operations
>> that needed delegations to be blocked used something more definitive
>> than a timer.
>>
>> This patch adds that definitive blocking by adding a counter to struct
>> file_lock_context of the number of concurrent operations which require
>> delegations to be blocked. check_conflicting_open() checks that counter
>> when a delegation is requested and denies the delegation if the counter
>> is elevated.
>>
>> try_break_deleg() now increments that counter when it records the inode
>> as a 'delegated_inode'.
>>
>> break_deleg_wait() now leaves the inode pointer in *delegated_inode when
>> it signals that the operation should be retried, and then clears it -
>> decrementing the new counter - when the operation has completed, whether
>> successfully or not. To achieve this we now pass the current error
>> status in to break_deleg_wait().
>>
>> vfs_rename() now uses two delegated_inode pointers, one for the
>> source and one for the destination in the case of replacement. This is
>> needed as it may be necessary to block further delegations to both
>> inodes while the rename completes.
>>
>> The net result is that we no longer depend on the delay that nfsd
>> imposes on new delegation in order for various functions that break
>> delegations to be sure that new delegations won't be added while they wait
>> with the inode unlocked. This gives more freedom to nfsd to make more
>> subtle choices about when and for how long to block delegations when
>> there is no active contention.
>>
>> try_break_deleg() is possibly now large enough that it shouldn't be
>> inline.
>>
>
> I like this approach. Moving the blocking of new delegations into the
> VFS layer makes sense, and we can do better, more targeted blocking of
> new leases this way.
I have only a little terminology quibble: "blocking" suggests
to my brain that this causes a process to sleep until some
resource is available.
That's not actually what is going on; this is a mechanism to
prevent handing out delegations in some cases. The OPEN is
allowed to proceed without hindrance.
Cork, retard, squelch, stifle... I like squelch, but it's a
little uncommon.
My two cents US.
> I wonder -- do we still need the bloom filter if we do this? We could
> keep track of the time of the last recall in i_flctx as well, and then
> avoid handing them out until some time has elapsed.
>
> try_break_deleg() (and break_deleg_wait(), for that matter) were
> already a bit large for inlines, so moving them to regular functions
> sounds like a good idea. Maybe we can even have inline fastpath
> wrappers around them that check for i_flctx == NULL and avoid the jmp
> in that case?
>
>> Signed-off-by: NeilBrown <neilb@suse.de>
>> ---
>> fs/locks.c | 12 ++++++++++--
>> fs/namei.c | 32 ++++++++++++++++++++------------
>> fs/open.c | 8 ++++----
>> fs/posix_acl.c | 8 ++++----
>> fs/utimes.c | 4 ++--
>> fs/xattr.c | 8 ++++----
>> include/linux/filelock.h | 31 ++++++++++++++++++++++++-------
>> include/linux/fs.h | 3 ++-
>> 8 files changed, 70 insertions(+), 36 deletions(-)
>>
>> diff --git a/fs/locks.c b/fs/locks.c
>> index e45cad40f8b6..171628094daa 100644
>> --- a/fs/locks.c
>> +++ b/fs/locks.c
>> @@ -191,6 +191,7 @@ locks_get_lock_context(struct inode *inode, int type)
>> INIT_LIST_HEAD(&ctx->flc_flock);
>> INIT_LIST_HEAD(&ctx->flc_posix);
>> INIT_LIST_HEAD(&ctx->flc_lease);
>> + atomic_set(&ctx->flc_deleg_blockers, 0);
>>
>> /*
>> * Assign the pointer if it's not already assigned. If it is, then
>> @@ -255,6 +256,7 @@ locks_free_lock_context(struct inode *inode)
>> struct file_lock_context *ctx = locks_inode_context(inode);
>>
>> if (unlikely(ctx)) {
>> + WARN_ON(atomic_read(&ctx->flc_deleg_blockers) != 0);
>> locks_check_ctx_lists(inode);
>> kmem_cache_free(flctx_cache, ctx);
>> }
>> @@ -1743,9 +1745,15 @@ check_conflicting_open(struct file *filp, const int arg, int flags)
>>
>> if (flags & FL_LAYOUT)
>> return 0;
>> - if (flags & FL_DELEG)
>> - /* We leave these checks to the caller */
>> + if (flags & FL_DELEG) {
>> + struct file_lock_context *ctx = locks_inode_context(inode);
>> +
>> + if (ctx && atomic_read(&ctx->flc_deleg_blockers) > 0)
>> + return -EAGAIN;
>> +
>> + /* We leave the remaining checks to the caller */
>> return 0;
>> + }
>>
>> if (arg == F_RDLCK)
>> return inode_is_open_for_write(inode) ? -EAGAIN : 0;
>> diff --git a/fs/namei.c b/fs/namei.c
>> index 5512cb10fa89..3054da90276b 100644
>> --- a/fs/namei.c
>> +++ b/fs/namei.c
>> @@ -4493,8 +4493,8 @@ int do_unlinkat(int dfd, struct filename *name)
>> iput(inode); /* truncate the inode here */
>> inode = NULL;
>> if (delegated_inode) {
>> - error = break_deleg_wait(&delegated_inode);
>> - if (!error)
>> + error = break_deleg_wait(&delegated_inode, error);
>> + if (error == -EWOULDBLOCK)
>> goto retry_deleg;
>> }
>> mnt_drop_write(path.mnt);
>> @@ -4764,8 +4764,8 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
>> out_dput:
>> done_path_create(&new_path, new_dentry);
>> if (delegated_inode) {
>> - error = break_deleg_wait(&delegated_inode);
>> - if (!error) {
>> + error = break_deleg_wait(&delegated_inode, error);
>> + if (error == -EWOULDBLOCK) {
>> path_put(&old_path);
>> goto retry;
>> }
>> @@ -4848,7 +4848,8 @@ int vfs_rename(struct renamedata *rd)
>> struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
>> struct dentry *old_dentry = rd->old_dentry;
>> struct dentry *new_dentry = rd->new_dentry;
>> - struct inode **delegated_inode = rd->delegated_inode;
>> + struct inode **delegated_inode_old = rd->delegated_inode_old;
>> + struct inode **delegated_inode_new = rd->delegated_inode_new;
>> unsigned int flags = rd->flags;
>> bool is_dir = d_is_dir(old_dentry);
>> struct inode *source = old_dentry->d_inode;
>> @@ -4954,12 +4955,12 @@ int vfs_rename(struct renamedata *rd)
>> goto out;
>> }
>> if (!is_dir) {
>> - error = try_break_deleg(source, delegated_inode);
>> + error = try_break_deleg(source, delegated_inode_old);
>> if (error)
>> goto out;
>> }
>> if (target && !new_is_dir) {
>> - error = try_break_deleg(target, delegated_inode);
>> + error = try_break_deleg(target, delegated_inode_new);
>> if (error)
>> goto out;
>> }
>> @@ -5011,7 +5012,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
>> struct path old_path, new_path;
>> struct qstr old_last, new_last;
>> int old_type, new_type;
>> - struct inode *delegated_inode = NULL;
>> + struct inode *delegated_inode_old = NULL;
>> + struct inode *delegated_inode_new = NULL;
>> unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
>> bool should_retry = false;
>> int error = -EINVAL;
>> @@ -5118,7 +5120,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
>> rd.new_dir = new_path.dentry->d_inode;
>> rd.new_dentry = new_dentry;
>> rd.new_mnt_idmap = mnt_idmap(new_path.mnt);
>> - rd.delegated_inode = &delegated_inode;
>> + rd.delegated_inode_old = &delegated_inode_old;
>> + rd.delegated_inode_new = &delegated_inode_new;
>> rd.flags = flags;
>> error = vfs_rename(&rd);
>> exit5:
>> @@ -5128,9 +5131,14 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
>> exit3:
>> unlock_rename(new_path.dentry, old_path.dentry);
>> exit_lock_rename:
>> - if (delegated_inode) {
>> - error = break_deleg_wait(&delegated_inode);
>> - if (!error)
>> + if (delegated_inode_old) {
>> + error = break_deleg_wait(&delegated_inode_old, error);
>> + if (error == -EWOULDBLOCK)
>> + goto retry_deleg;
>> + }
>> + if (delegated_inode_new) {
>> + error = break_deleg_wait(&delegated_inode_new, error);
>> + if (error == -EWOULDBLOCK)
>> goto retry_deleg;
>> }
>> mnt_drop_write(old_path.mnt);
>> diff --git a/fs/open.c b/fs/open.c
>> index 22adbef7ecc2..6b6d20a68dd8 100644
>> --- a/fs/open.c
>> +++ b/fs/open.c
>> @@ -656,8 +656,8 @@ int chmod_common(const struct path *path, umode_t mode)
>> out_unlock:
>> inode_unlock(inode);
>> if (delegated_inode) {
>> - error = break_deleg_wait(&delegated_inode);
>> - if (!error)
>> + error = break_deleg_wait(&delegated_inode, error);
>> + if (error == -EWOULDBLOCK)
>> goto retry_deleg;
>> }
>> mnt_drop_write(path->mnt);
>> @@ -795,8 +795,8 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
>> &delegated_inode);
>> inode_unlock(inode);
>> if (delegated_inode) {
>> - error = break_deleg_wait(&delegated_inode);
>> - if (!error)
>> + error = break_deleg_wait(&delegated_inode, error);
>> + if (error == -EWOULDBLOCK)
>> goto retry_deleg;
>> }
>> return error;
>> diff --git a/fs/posix_acl.c b/fs/posix_acl.c
>> index 3f87297dbfdb..5eb3635d1067 100644
>> --- a/fs/posix_acl.c
>> +++ b/fs/posix_acl.c
>> @@ -1143,8 +1143,8 @@ int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
>> inode_unlock(inode);
>>
>> if (delegated_inode) {
>> - error = break_deleg_wait(&delegated_inode);
>> - if (!error)
>> + error = break_deleg_wait(&delegated_inode, error);
>> + if (error == -EWOULDBLOCK)
>> goto retry_deleg;
>> }
>>
>> @@ -1251,8 +1251,8 @@ int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry,
>> inode_unlock(inode);
>>
>> if (delegated_inode) {
>> - error = break_deleg_wait(&delegated_inode);
>> - if (!error)
>> + error = break_deleg_wait(&delegated_inode, error);
>> + if (error == -EWOULDBLOCK)
>> goto retry_deleg;
>> }
>>
>> diff --git a/fs/utimes.c b/fs/utimes.c
>> index 3701b3946f88..21b7605551dc 100644
>> --- a/fs/utimes.c
>> +++ b/fs/utimes.c
>> @@ -67,8 +67,8 @@ int vfs_utimes(const struct path *path, struct timespec64 *times)
>> &delegated_inode);
>> inode_unlock(inode);
>> if (delegated_inode) {
>> - error = break_deleg_wait(&delegated_inode);
>> - if (!error)
>> + error = break_deleg_wait(&delegated_inode, error);
>> + if (error == -EWOULDBLOCK)
>> goto retry_deleg;
>> }
>>
>> diff --git a/fs/xattr.c b/fs/xattr.c
>> index 7672ce5486c5..63e0b067dab9 100644
>> --- a/fs/xattr.c
>> +++ b/fs/xattr.c
>> @@ -323,8 +323,8 @@ vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
>> inode_unlock(inode);
>>
>> if (delegated_inode) {
>> - error = break_deleg_wait(&delegated_inode);
>> - if (!error)
>> + error = break_deleg_wait(&delegated_inode, error);
>> + if (error == -EWOULDBLOCK)
>> goto retry_deleg;
>> }
>> if (value != orig_value)
>> @@ -577,8 +577,8 @@ vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
>> inode_unlock(inode);
>>
>> if (delegated_inode) {
>> - error = break_deleg_wait(&delegated_inode);
>> - if (!error)
>> + error = break_deleg_wait(&delegated_inode, error);
>> + if (error == -EWOULDBLOCK)
>> goto retry_deleg;
>> }
>>
>> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
>> index daee999d05f3..66470ba9658c 100644
>> --- a/include/linux/filelock.h
>> +++ b/include/linux/filelock.h
>> @@ -144,6 +144,7 @@ struct file_lock_context {
>> struct list_head flc_flock;
>> struct list_head flc_posix;
>> struct list_head flc_lease;
>> + atomic_t flc_deleg_blockers;
>> };
>>
>> #ifdef CONFIG_FILE_LOCKING
>> @@ -450,21 +451,37 @@ static inline int try_break_deleg(struct inode *inode, struct inode **delegated_
>> {
>> int ret;
>>
>> + if (delegated_inode && *delegated_inode) {
>> + if (*delegated_inode == inode)
>> + /* Don't need to count this */
>> + return break_deleg(inode, O_WRONLY|O_NONBLOCK);
>> +
>> + /* inode changed, forget the old one */
>> + atomic_dec(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
>> + iput(*delegated_inode);
>> + *delegated_inode = NULL;
>> + }
>> ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
>> if (ret == -EWOULDBLOCK && delegated_inode) {
>> *delegated_inode = inode;
>> + atomic_inc(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
>> ihold(inode);
>> }
>> return ret;
>> }
>>
>> -static inline int break_deleg_wait(struct inode **delegated_inode)
>> +static inline int break_deleg_wait(struct inode **delegated_inode, int ret)
>> {
>> - int ret;
>> -
>> - ret = break_deleg(*delegated_inode, O_WRONLY);
>> - iput(*delegated_inode);
>> - *delegated_inode = NULL;
>> + if (ret == -EWOULDBLOCK) {
>> + ret = break_deleg(*delegated_inode, O_WRONLY);
>> + if (ret == 0)
>> + ret = -EWOULDBLOCK;
>> + }
>> + if (ret != -EWOULDBLOCK) {
>> + atomic_dec(&(*delegated_inode)->i_flctx->flc_deleg_blockers);
>> + iput(*delegated_inode);
>> + *delegated_inode = NULL;
>> + }
>> return ret;
>> }
>>
>> @@ -494,7 +511,7 @@ static inline int try_break_deleg(struct inode *inode, struct inode **delegated_
>> return 0;
>> }
>>
>> -static inline int break_deleg_wait(struct inode **delegated_inode)
>> +static inline int break_deleg_wait(struct inode **delegated_inode, int ret)
>> {
>> BUG();
>> return 0;
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 6ca11e241a24..50957d9e1c2b 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -1902,7 +1902,8 @@ struct renamedata {
>> struct mnt_idmap *new_mnt_idmap;
>> struct inode *new_dir;
>> struct dentry *new_dentry;
>> - struct inode **delegated_inode;
>> + struct inode **delegated_inode_old;
>> + struct inode **delegated_inode_new;
>> unsigned int flags;
>> } __randomize_layout;
>>
>>
>> base-commit: 98f7e32f20d28ec452afb208f9cffc08448a2652
>
> --
> Jeff Layton <jlayton@kernel.org>
--
Chuck Lever
^ permalink raw reply [flat|nested] 10+ messages in thread