linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Josef Bacik <josef@toxicpanda.com>
To: Christian Brauner <brauner@kernel.org>
Cc: linux-fsdevel@vger.kernel.org, Seth Forshee <sforshee@kernel.org>
Subject: Re: [PATCH 3/4] mnt_idmapping: decouple from namespaces
Date: Wed, 22 Nov 2023 09:26:57 -0500	[thread overview]
Message-ID: <20231122142657.GF1733890@perftesting> (raw)
In-Reply-To: <20231122-vfs-mnt_idmap-v1-3-dae4abdde5bd@kernel.org>

On Wed, Nov 22, 2023 at 01:44:39PM +0100, Christian Brauner wrote:
> There's no reason we need to couple mnt idmapping to namespaces in the
> way we currently do. Copy the idmapping when an idmapped mount is
> created and don't take any reference on the namespace at all.
> 
> We also can't easily refcount struct uid_gid_map because it needs to
> stay the size of a cacheline otherwise we risk performance regressions
> (Ignoring for a second that right now struct uid_gid_map isn't actually
>  64 byte but 72 but that's a fix for another patch series.).
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
>  fs/mnt_idmapping.c      | 106 +++++++++++++++++++++++++++++++++++++++++-------
>  include/linux/uidgid.h  |  13 ++++++
>  kernel/user_namespace.c |   4 +-
>  3 files changed, 106 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> index 35d78cb3c38a..64c5205e2b5e 100644
> --- a/fs/mnt_idmapping.c
> +++ b/fs/mnt_idmapping.c
> @@ -9,8 +9,16 @@
>  
>  #include "internal.h"
>  
> +/*
> + * Outside of this file vfs{g,u}id_t are always created from k{g,u}id_t,
> + * never from raw values. These are just internal helpers.
> + */
> +#define VFSUIDT_INIT_RAW(val) (vfsuid_t){ val }
> +#define VFSGIDT_INIT_RAW(val) (vfsgid_t){ val }
> +
>  struct mnt_idmap {
> -	struct user_namespace *owner;
> +	struct uid_gid_map uid_map;
> +	struct uid_gid_map gid_map;
>  	refcount_t count;
>  };
>  
> @@ -20,7 +28,6 @@ struct mnt_idmap {
>   * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
>   */
>  struct mnt_idmap nop_mnt_idmap = {
> -	.owner	= &init_user_ns,
>  	.count	= REFCOUNT_INIT(1),
>  };
>  EXPORT_SYMBOL_GPL(nop_mnt_idmap);
> @@ -65,7 +72,6 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
>  		     kuid_t kuid)
>  {
>  	uid_t uid;
> -	struct user_namespace *mnt_userns = idmap->owner;
>  
>  	if (idmap == &nop_mnt_idmap)
>  		return VFSUIDT_INIT(kuid);
> @@ -75,7 +81,7 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
>  		uid = from_kuid(fs_userns, kuid);
>  	if (uid == (uid_t)-1)
>  		return INVALID_VFSUID;
> -	return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
> +	return VFSUIDT_INIT_RAW(map_id_down(&idmap->uid_map, uid));
>  }
>  EXPORT_SYMBOL_GPL(make_vfsuid);
>  
> @@ -103,7 +109,6 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
>  		     struct user_namespace *fs_userns, kgid_t kgid)
>  {
>  	gid_t gid;
> -	struct user_namespace *mnt_userns = idmap->owner;
>  
>  	if (idmap == &nop_mnt_idmap)
>  		return VFSGIDT_INIT(kgid);
> @@ -113,7 +118,7 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
>  		gid = from_kgid(fs_userns, kgid);
>  	if (gid == (gid_t)-1)
>  		return INVALID_VFSGID;
> -	return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
> +	return VFSGIDT_INIT_RAW(map_id_down(&idmap->gid_map, gid));
>  }
>  EXPORT_SYMBOL_GPL(make_vfsgid);
>  
> @@ -132,11 +137,10 @@ kuid_t from_vfsuid(struct mnt_idmap *idmap,
>  		   struct user_namespace *fs_userns, vfsuid_t vfsuid)
>  {
>  	uid_t uid;
> -	struct user_namespace *mnt_userns = idmap->owner;
>  
>  	if (idmap == &nop_mnt_idmap)
>  		return AS_KUIDT(vfsuid);
> -	uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid));
> +	uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid));
>  	if (uid == (uid_t)-1)
>  		return INVALID_UID;
>  	if (initial_idmapping(fs_userns))
> @@ -160,11 +164,10 @@ kgid_t from_vfsgid(struct mnt_idmap *idmap,
>  		   struct user_namespace *fs_userns, vfsgid_t vfsgid)
>  {
>  	gid_t gid;
> -	struct user_namespace *mnt_userns = idmap->owner;
>  
>  	if (idmap == &nop_mnt_idmap)
>  		return AS_KGIDT(vfsgid);
> -	gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid));
> +	gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid));
>  	if (gid == (gid_t)-1)
>  		return INVALID_GID;
>  	if (initial_idmapping(fs_userns))
> @@ -195,16 +198,91 @@ int vfsgid_in_group_p(vfsgid_t vfsgid)
>  #endif
>  EXPORT_SYMBOL_GPL(vfsgid_in_group_p);
>  
> +static int copy_mnt_idmap(struct uid_gid_map *map_from,
> +			  struct uid_gid_map *map_to)
> +{
> +	struct uid_gid_extent *forward, *reverse;
> +	u32 nr_extents = READ_ONCE(map_from->nr_extents);
> +	/* Pairs with smp_wmb() when writing the idmapping. */
> +	smp_rmb();
> +
> +	/*
> +	 * Don't blindly copy @map_to into @map_from if nr_extents is
> +	 * smaller or equal to UID_GID_MAP_MAX_BASE_EXTENTS. Since we
> +	 * read @nr_extents someone could have written an idmapping and
> +	 * then we might end up with inconsistent data. So just don't do
> +	 * anything at all.
> +	 */
> +	if (nr_extents == 0)
> +		return 0;
> +
> +	/*
> +	 * Here we know that nr_extents is greater than zero which means
> +	 * a map has been written. Since idmappings can't be changed
> +	 * once they have been written we know that we can safely copy
> +	 * from @map_to into @map_from.
> +	 */
> +
> +	if (nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
> +		*map_to = *map_from;
> +		return 0;
> +	}
> +
> +	forward = kmemdup(map_from->forward,
> +			  nr_extents * sizeof(struct uid_gid_extent),
> +			  GFP_KERNEL_ACCOUNT);
> +	if (!forward)
> +		return -ENOMEM;
> +
> +	reverse = kmemdup(map_from->reverse,
> +			  nr_extents * sizeof(struct uid_gid_extent),
> +			  GFP_KERNEL_ACCOUNT);
> +	if (!reverse) {
> +		kfree(forward);
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * The idmapping isn't exposed anywhere so we don't need to care
> +	 * about ordering between extent pointers and @nr_extents
> +	 * initialization.
> +	 */
> +	map_to->forward = forward;
> +	map_to->reverse = reverse;
> +	map_to->nr_extents = nr_extents;
> +	return 0;
> +}
> +
> +static void free_mnt_idmap(struct mnt_idmap *idmap)
> +{
> +	if (idmap->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
> +		kfree(idmap->uid_map.forward);
> +		kfree(idmap->uid_map.reverse);
> +	}
> +	if (idmap->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
> +		kfree(idmap->gid_map.forward);
> +		kfree(idmap->gid_map.reverse);
> +	}
> +	kfree(idmap);
> +}
> +
>  struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
>  {
>  	struct mnt_idmap *idmap;
> +	int ret;
>  
>  	idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
>  	if (!idmap)
>  		return ERR_PTR(-ENOMEM);
>  
> -	idmap->owner = get_user_ns(mnt_userns);
>  	refcount_set(&idmap->count, 1);
> +	ret = copy_mnt_idmap(&mnt_userns->uid_map, &idmap->uid_map);
> +	if (!ret)
> +		ret = copy_mnt_idmap(&mnt_userns->gid_map, &idmap->gid_map);
> +	if (ret) {
> +		free_mnt_idmap(idmap);
> +		idmap = ERR_PTR(ret);
> +	}
>  	return idmap;
>  }
>  
> @@ -234,9 +312,7 @@ EXPORT_SYMBOL_GPL(mnt_idmap_get);
>   */
>  void mnt_idmap_put(struct mnt_idmap *idmap)
>  {
> -	if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) {
> -		put_user_ns(idmap->owner);
> -		kfree(idmap);
> -	}
> +	if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count))
> +		free_mnt_idmap(idmap);
>  }
>  EXPORT_SYMBOL_GPL(mnt_idmap_put);
> diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
> index b0542cd11aeb..7806e93b907d 100644
> --- a/include/linux/uidgid.h
> +++ b/include/linux/uidgid.h
> @@ -17,6 +17,7 @@
>  
>  struct user_namespace;
>  extern struct user_namespace init_user_ns;
> +struct uid_gid_map;
>  
>  typedef struct {
>  	uid_t val;
> @@ -138,6 +139,9 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
>  	return from_kgid(ns, gid) != (gid_t) -1;
>  }
>  
> +u32 map_id_down(struct uid_gid_map *map, u32 id);
> +u32 map_id_up(struct uid_gid_map *map, u32 id);
> +
>  #else
>  
>  static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid)
> @@ -186,6 +190,15 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
>  	return gid_valid(gid);
>  }
>  
> +static inline u32 map_id_down(struct uid_gid_map *map, u32 id)
> +{
> +	return id;
> +}
> +
> +static inline u32 map_id_up(struct uid_gid_map *map, u32 id);

You accidentally put a ; here, and then fix it up in the next patch, it needs to
be fixed here.  Thanks,

Josef

  reply	other threads:[~2023-11-22 14:26 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-22 12:44 [PATCH 0/4] mnt_idmapping: decouple from namespaces Christian Brauner
2023-11-22 12:44 ` [PATCH 1/4] mnt_idmapping: remove check_fsmapping() Christian Brauner
2023-11-22 12:44 ` [PATCH 2/4] mnt_idmapping: remove nop check Christian Brauner
2023-11-22 12:44 ` [PATCH 3/4] mnt_idmapping: decouple from namespaces Christian Brauner
2023-11-22 14:26   ` Josef Bacik [this message]
2023-11-22 14:34     ` Christian Brauner
2023-11-22 15:14       ` Josef Bacik
2023-11-22 12:44 ` [PATCH 4/4] fs: reformat idmapped mounts entry Christian Brauner
2023-11-24  7:52 ` [PATCH 0/4] mnt_idmapping: decouple from namespaces Christian Brauner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231122142657.GF1733890@perftesting \
    --to=josef@toxicpanda.com \
    --cc=brauner@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=sforshee@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).