linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: Christian Brauner <brauner@kernel.org>
Cc: linux-fsdevel@vger.kernel.org,
	"Seth Forshee (Digital Ocean)" <sforshee@kernel.org>,
	Christoph Hellwig <hch@lst.de>, Al Viro <viro@zeniv.linux.org.uk>
Subject: Re: [PATCH 23/25] fs: port fs{g,u}id helpers to mnt_idmap
Date: Mon, 17 Jun 2024 11:44:00 -0700	[thread overview]
Message-ID: <20240617184400.GA103057@frogsfrogsfrogs> (raw)
In-Reply-To: <20240617-weitblick-gefertigt-4a41f37119fa@brauner>

On Mon, Jun 17, 2024 at 09:41:21AM +0200, Christian Brauner wrote:
> On Fri, Jun 14, 2024 at 04:15:47PM GMT, Darrick J. Wong wrote:
> > On Fri, Jan 13, 2023 at 12:49:31PM +0100, Christian Brauner wrote:
> > > Convert to struct mnt_idmap.
> > > 
> > > Last cycle we merged the necessary infrastructure in
> > > 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts").
> > > This is just the conversion to struct mnt_idmap.
> > > 
> > > Currently we still pass around the plain namespace that was attached to a
> > > mount. This is in general pretty convenient but it makes it easy to
> > > conflate namespaces that are relevant on the filesystem with namespaces
> > > that are relevent on the mount level. Especially for non-vfs developers
> > > without detailed knowledge in this area this can be a potential source for
> > > bugs.
> > > 
> > > Once the conversion to struct mnt_idmap is done all helpers down to the
> > > really low-level helpers will take a struct mnt_idmap argument instead of
> > > two namespace arguments. This way it becomes impossible to conflate the two
> > > eliminating the possibility of any bugs. All of the vfs and all filesystems
> > > only operate on struct mnt_idmap.
> > > 
> > > Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
> > > ---
> > >  fs/ext4/ialloc.c              |  3 +--
> > >  fs/inode.c                    |  6 ++----
> > >  fs/xfs/xfs_inode.c            | 13 +++++--------
> > >  fs/xfs/xfs_symlink.c          |  5 ++---
> > >  include/linux/fs.h            | 21 ++++++++++-----------
> > >  include/linux/mnt_idmapping.h | 18 ++++++++++--------
> > >  6 files changed, 30 insertions(+), 36 deletions(-)
> > > 
> > > diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> > > index 1024b0c02431..157663031f8c 100644
> > > --- a/fs/ext4/ialloc.c
> > > +++ b/fs/ext4/ialloc.c
> > > @@ -943,7 +943,6 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
> > >  	ext4_group_t flex_group;
> > >  	struct ext4_group_info *grp = NULL;
> > >  	bool encrypt = false;
> > > -	struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
> > >  
> > >  	/* Cannot create files in a deleted directory */
> > >  	if (!dir || !dir->i_nlink)
> > > @@ -973,7 +972,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
> > >  		i_gid_write(inode, owner[1]);
> > >  	} else if (test_opt(sb, GRPID)) {
> > >  		inode->i_mode = mode;
> > > -		inode_fsuid_set(inode, mnt_userns);
> > > +		inode_fsuid_set(inode, idmap);
> > >  		inode->i_gid = dir->i_gid;
> > >  	} else
> > >  		inode_init_owner(idmap, inode, dir, mode);
> > > diff --git a/fs/inode.c b/fs/inode.c
> > > index 1aec92141fab..1b05e0e4b5c8 100644
> > > --- a/fs/inode.c
> > > +++ b/fs/inode.c
> > > @@ -2293,9 +2293,7 @@ EXPORT_SYMBOL(init_special_inode);
> > >  void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
> > >  		      const struct inode *dir, umode_t mode)
> > >  {
> > > -	struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
> > > -
> > > -	inode_fsuid_set(inode, mnt_userns);
> > > +	inode_fsuid_set(inode, idmap);
> > >  	if (dir && dir->i_mode & S_ISGID) {
> > >  		inode->i_gid = dir->i_gid;
> > >  
> > > @@ -2303,7 +2301,7 @@ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
> > >  		if (S_ISDIR(mode))
> > >  			mode |= S_ISGID;
> > >  	} else
> > > -		inode_fsgid_set(inode, mnt_userns);
> > > +		inode_fsgid_set(inode, idmap);
> > >  	inode->i_mode = mode;
> > >  }
> > >  EXPORT_SYMBOL(inode_init_owner);
> > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > > index 59fb064e2df3..7f1d715faab5 100644
> > > --- a/fs/xfs/xfs_inode.c
> > > +++ b/fs/xfs/xfs_inode.c
> > > @@ -788,7 +788,6 @@ xfs_init_new_inode(
> > >  	bool			init_xattrs,
> > >  	struct xfs_inode	**ipp)
> > >  {
> > > -	struct user_namespace	*mnt_userns = mnt_idmap_owner(idmap);
> > >  	struct inode		*dir = pip ? VFS_I(pip) : NULL;
> > >  	struct xfs_mount	*mp = tp->t_mountp;
> > >  	struct xfs_inode	*ip;
> > > @@ -824,7 +823,7 @@ xfs_init_new_inode(
> > >  	ip->i_projid = prid;
> > >  
> > >  	if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
> > > -		inode_fsuid_set(inode, mnt_userns);
> > > +		inode_fsuid_set(inode, idmap);
> > >  		inode->i_gid = dir->i_gid;
> > >  		inode->i_mode = mode;
> > >  	} else {
> > > @@ -955,7 +954,6 @@ xfs_create(
> > >  	bool			init_xattrs,
> > >  	xfs_inode_t		**ipp)
> > >  {
> > > -	struct user_namespace	*mnt_userns = mnt_idmap_owner(idmap);
> > >  	int			is_dir = S_ISDIR(mode);
> > >  	struct xfs_mount	*mp = dp->i_mount;
> > >  	struct xfs_inode	*ip = NULL;
> > > @@ -980,8 +978,8 @@ xfs_create(
> > >  	/*
> > >  	 * Make sure that we have allocated dquot(s) on disk.
> > >  	 */
> > > -	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
> > > -			mapped_fsgid(mnt_userns, &init_user_ns), prid,
> > > +	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
> > > +			mapped_fsgid(idmap, &init_user_ns), prid,
> > >  			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
> > >  			&udqp, &gdqp, &pdqp);
> > >  	if (error)
> > > @@ -1109,7 +1107,6 @@ xfs_create_tmpfile(
> > >  	umode_t			mode,
> > >  	struct xfs_inode	**ipp)
> > >  {
> > > -	struct user_namespace	*mnt_userns = mnt_idmap_owner(idmap);
> > >  	struct xfs_mount	*mp = dp->i_mount;
> > >  	struct xfs_inode	*ip = NULL;
> > >  	struct xfs_trans	*tp = NULL;
> > > @@ -1130,8 +1127,8 @@ xfs_create_tmpfile(
> > >  	/*
> > >  	 * Make sure that we have allocated dquot(s) on disk.
> > >  	 */
> > > -	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
> > > -			mapped_fsgid(mnt_userns, &init_user_ns), prid,
> > > +	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
> > > +			mapped_fsgid(idmap, &init_user_ns), prid,
> > >  			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
> > >  			&udqp, &gdqp, &pdqp);
> > >  	if (error)
> > > diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
> > > index 24cf0a16bf35..85e433df6a3f 100644
> > > --- a/fs/xfs/xfs_symlink.c
> > > +++ b/fs/xfs/xfs_symlink.c
> > > @@ -151,7 +151,6 @@ xfs_symlink(
> > >  	umode_t			mode,
> > >  	struct xfs_inode	**ipp)
> > >  {
> > > -	struct user_namespace	*mnt_userns = mnt_idmap_owner(idmap);
> > >  	struct xfs_mount	*mp = dp->i_mount;
> > >  	struct xfs_trans	*tp = NULL;
> > >  	struct xfs_inode	*ip = NULL;
> > > @@ -194,8 +193,8 @@ xfs_symlink(
> > >  	/*
> > >  	 * Make sure that we have allocated dquot(s) on disk.
> > >  	 */
> > > -	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
> > > -			mapped_fsgid(mnt_userns, &init_user_ns), prid,
> > > +	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
> > > +			mapped_fsgid(idmap, &init_user_ns), prid,
> > 
> > I know this is pretty late to be asking questions about this patch, but
> > after reviewing this code, I am curious about something --
> 
> That's perfectly fine!
> 
> > 
> > Here we try to allocate dquots prior to creating a file.  The uid used
> > here is:
> > 
> > 	mapped_fsuid(mnt_userns, &init_user_ns)
> > 
> > But later in inode_fsuid_set, we set the new inode's actual uid to:
> > 
> > 	mapped_fsuid(mnt_userns, i_user_ns(inode));
> > 
> > What happens if i_user_ns(inode) != &init_user_ns?  Can that change the
> 
> Excellent observation and question.
> 
> > return value of mapped_fsuid with the result that quota accounting
> > charges the wrong uid if the user namespaces are different?
> > Unfortunately I'm not familiar enough with how these things work to know
> > if that's even a sensible question.
> 
> So a few preliminary remarks:
> 
> (1) i_user_ns() retries the user namespace of the superblock. For
>     example:
> 
>     mount a tmpfs filesystem in the initial mount namespace owned by
>     the initial user namespace:
> 
>             mount -t tmpfs tmpfs /opt
> 
>     In this case
> 
>             init_user_ns == i_user_ns(inode)
> 
> 
>     But if we create a new mount namespace owned by an unprivilged user
>     namespace:
> 
>             unshare --mount --user --map-root
> 
>     and mount tmpfs in there:
> 
>             mount -t tmpfs tmpfs /mnt
> 
>     then
> 
>             init_user_ns != i_user_ns(inode)
> 
> (2) Filesystems must be explicitly marked as being mountable inside of a
>     user namespace aka by an unprivileged user. That only includes
>     pseudo filesystems (and I really want a VFS assertion that it
>     stays that way).
> 
> Since xfs isn't in (2) the scenario described in (1) cannot happen.

Ah, ok.  Thanks for the explanation.  I probably should update xfs to
keep these things consistent, but afaict it doesn't make any difference
with the current codebase.

> For all non-userns mountable filesystems like xfs I generally chose to
> pass init_user_ns explicitly everywhere it's required. But general
> helpers like i_user_ns() need to work for both and that's why it always
> uses i_user_ns().
> 
> You could technically put a WARN_ON_ONCE() in inode_fsuid_set() that
> basically does:
> 
> @@ -1492,6 +1492,7 @@ static inline void i_gid_update(struct mnt_idmap *idmap,
>  static inline void inode_fsuid_set(struct inode *inode,
>                                    struct mnt_idmap *idmap)
>  {
> +       WARN_ON_ONCE(i_user_ns(inode) != &init_user_ns && inode->i_sb->s_type->fs_flags & FS_USERNS_MOUNT);
>         inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode));
>  }
> 
> But I didn't think it was worth it and if we wanted to use it then I'd rather
> at an inode flag so we don't have to do this double deref thing.

Upon further investigation, it turns out that xfs_qm_vop_create_dqattach
already checks the inode ids vs. the dquot ids:

	if (udqp && XFS_IS_UQUOTA_ON(mp)) {
		ASSERT(ip->i_udquot == NULL);
		ASSERT(i_uid_read(VFS_I(ip)) == udqp->q_id);

		ip->i_udquot = xfs_qm_dqhold(udqp);
	}

If there's ever a discrepancy we'll catch this on CONFIG_XFS_DEBUG=y
systems, provided that the people doing QA also turn on debug mode.

Thanks for the insight!

--D

> > 
> > In my development tree I have a few assertions sprinked to see if we
> > ever detect a discrepancy in the uid that is set.  fstests doesn't
> > trigger it, nor does my usual workflow, so I really don't know.
> > 
> > --D
> > 
> > >  			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
> > >  			&udqp, &gdqp, &pdqp);
> > >  	if (error)
> > > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > > index 173c5274a63a..54a95ed68322 100644
> > > --- a/include/linux/fs.h
> > > +++ b/include/linux/fs.h
> > > @@ -1744,29 +1744,29 @@ static inline void i_gid_update(struct mnt_idmap *idmap,
> > >  /**
> > >   * inode_fsuid_set - initialize inode's i_uid field with callers fsuid
> > >   * @inode: inode to initialize
> > > - * @mnt_userns: user namespace of the mount the inode was found from
> > > + * @idmap: idmap of the mount the inode was found from
> > >   *
> > >   * Initialize the i_uid field of @inode. If the inode was found/created via
> > > - * an idmapped mount map the caller's fsuid according to @mnt_users.
> > > + * an idmapped mount map the caller's fsuid according to @idmap.
> > >   */
> > >  static inline void inode_fsuid_set(struct inode *inode,
> > > -				   struct user_namespace *mnt_userns)
> > > +				   struct mnt_idmap *idmap)
> > >  {
> > > -	inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
> > > +	inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode));
> > >  }
> > >  
> > >  /**
> > >   * inode_fsgid_set - initialize inode's i_gid field with callers fsgid
> > >   * @inode: inode to initialize
> > > - * @mnt_userns: user namespace of the mount the inode was found from
> > > + * @idmap: idmap of the mount the inode was found from
> > >   *
> > >   * Initialize the i_gid field of @inode. If the inode was found/created via
> > > - * an idmapped mount map the caller's fsgid according to @mnt_users.
> > > + * an idmapped mount map the caller's fsgid according to @idmap.
> > >   */
> > >  static inline void inode_fsgid_set(struct inode *inode,
> > > -				   struct user_namespace *mnt_userns)
> > > +				   struct mnt_idmap *idmap)
> > >  {
> > > -	inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
> > > +	inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode));
> > >  }
> > >  
> > >  /**
> > > @@ -1784,14 +1784,13 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
> > >  					struct mnt_idmap *idmap)
> > >  {
> > >  	struct user_namespace *fs_userns = sb->s_user_ns;
> > > -	struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
> > >  	kuid_t kuid;
> > >  	kgid_t kgid;
> > >  
> > > -	kuid = mapped_fsuid(mnt_userns, fs_userns);
> > > +	kuid = mapped_fsuid(idmap, fs_userns);
> > >  	if (!uid_valid(kuid))
> > >  		return false;
> > > -	kgid = mapped_fsgid(mnt_userns, fs_userns);
> > > +	kgid = mapped_fsgid(idmap, fs_userns);
> > >  	if (!gid_valid(kgid))
> > >  		return false;
> > >  	return kuid_has_mapping(fs_userns, kuid) &&
> > > diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
> > > index 0ccca33a7a6d..d63e7c84a389 100644
> > > --- a/include/linux/mnt_idmapping.h
> > > +++ b/include/linux/mnt_idmapping.h
> > > @@ -375,8 +375,8 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid)
> > >  }
> > >  
> > >  /**
> > > - * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
> > > - * @mnt_userns: the mount's idmapping
> > > + * mapped_fsuid - return caller's fsuid mapped according to an idmapping
> > > + * @idmap: the mount's idmapping
> > >   * @fs_userns: the filesystem's idmapping
> > >   *
> > >   * Use this helper to initialize a new vfs or filesystem object based on
> > > @@ -385,18 +385,19 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid)
> > >   * O_CREAT. Other examples include the allocation of quotas for a specific
> > >   * user.
> > >   *
> > > - * Return: the caller's current fsuid mapped up according to @mnt_userns.
> > > + * Return: the caller's current fsuid mapped up according to @idmap.
> > >   */
> > > -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
> > > +static inline kuid_t mapped_fsuid(struct mnt_idmap *idmap,
> > >  				  struct user_namespace *fs_userns)
> > >  {
> > > +	struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
> > >  	return from_vfsuid(mnt_userns, fs_userns,
> > >  			   VFSUIDT_INIT(current_fsuid()));
> > >  }
> > >  
> > >  /**
> > > - * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
> > > - * @mnt_userns: the mount's idmapping
> > > + * mapped_fsgid - return caller's fsgid mapped according to an idmapping
> > > + * @idmap: the mount's idmapping
> > >   * @fs_userns: the filesystem's idmapping
> > >   *
> > >   * Use this helper to initialize a new vfs or filesystem object based on
> > > @@ -405,11 +406,12 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
> > >   * O_CREAT. Other examples include the allocation of quotas for a specific
> > >   * user.
> > >   *
> > > - * Return: the caller's current fsgid mapped up according to @mnt_userns.
> > > + * Return: the caller's current fsgid mapped up according to @idmap.
> > >   */
> > > -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns,
> > > +static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap,
> > >  				  struct user_namespace *fs_userns)
> > >  {
> > > +	struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
> > >  	return from_vfsgid(mnt_userns, fs_userns,
> > >  			   VFSGIDT_INIT(current_fsgid()));
> > >  }
> > > 
> > > -- 
> > > 2.34.1
> > > 

  reply	other threads:[~2024-06-17 18:44 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-13 11:49 [PATCH 00/25] fs: finish conversion to mnt_idmap Christian Brauner
2023-01-13 11:49 ` [PATCH 01/25] f2fs: project ids aren't idmapped Christian Brauner
2023-01-16  7:02   ` Christoph Hellwig
2023-01-13 11:49 ` [PATCH 02/25] fs: port vfs_*() helpers to struct mnt_idmap Christian Brauner
2023-01-13 11:49 ` [PATCH 03/25] fs: port ->setattr() to pass mnt_idmap Christian Brauner
2023-01-16  7:05   ` Christoph Hellwig
2023-01-13 11:49 ` [PATCH 04/25] fs: port ->getattr() " Christian Brauner
2023-01-13 11:49 ` [PATCH 05/25] fs: port ->create() " Christian Brauner
2023-01-13 11:49 ` [PATCH 06/25] fs: port ->symlink() " Christian Brauner
2023-01-13 11:49 ` [PATCH 07/25] fs: port ->mkdir() " Christian Brauner
2023-01-13 11:49 ` [PATCH 08/25] fs: port ->mknod() " Christian Brauner
2023-01-13 11:49 ` [PATCH 09/25] fs: port ->rename() " Christian Brauner
2023-01-13 11:49 ` [PATCH 10/25] fs: port ->tmpfile() " Christian Brauner
2023-01-13 11:49 ` [PATCH 11/25] fs: port ->get_acl() " Christian Brauner
2023-01-13 11:49 ` [PATCH 12/25] fs: port ->set_acl() " Christian Brauner
2023-01-13 11:49 ` [PATCH 13/25] fs: port ->fileattr_set() " Christian Brauner
2023-01-13 11:49 ` [PATCH 14/25] fs: port ->permission() " Christian Brauner
2023-01-13 11:49 ` [PATCH 15/25] fs: port xattr to mnt_idmap Christian Brauner
2023-01-13 11:49 ` [PATCH 16/25] fs: port acl " Christian Brauner
2023-01-13 11:49 ` [PATCH 17/25] fs: port inode_init_owner() " Christian Brauner
2023-01-13 11:49 ` [PATCH 18/25] fs: port inode_owner_or_capable() " Christian Brauner
2023-01-13 11:49 ` [PATCH 19/25] fs: port privilege checking helpers " Christian Brauner
2023-01-13 11:49 ` [PATCH 20/25] quota: port " Christian Brauner
2023-01-13 11:49 ` [PATCH 21/25] fs: port i_{g,u}id_{needs_}update() " Christian Brauner
2023-01-13 11:49 ` [PATCH 22/25] fs: port i_{g,u}id_into_vfs{g,u}id() " Christian Brauner
2023-01-13 11:49 ` [PATCH 23/25] fs: port fs{g,u}id helpers " Christian Brauner
2024-06-14 23:15   ` Darrick J. Wong
2024-06-17  7:41     ` Christian Brauner
2024-06-17 18:44       ` Darrick J. Wong [this message]
2023-01-13 11:49 ` [PATCH 24/25] fs: port vfs{g,u}id " Christian Brauner
2023-01-13 11:49 ` [PATCH 25/25] fs: move mnt_idmap Christian Brauner
2023-01-16  7:09   ` Christoph Hellwig
2023-01-16  7:09 ` [PATCH 00/25] fs: finish conversion to mnt_idmap Christoph Hellwig
2023-01-17 22:31 ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240617184400.GA103057@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=brauner@kernel.org \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=sforshee@kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).