All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Serge E. Hallyn" <serue@us.ibm.com>
To: Benjamin Thery <benjamin.thery@bull.net>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Greg Kroah-Hartman <gregkh@suse.de>,
	Eric Biederman <ebiederm@xmission.com>,
	Serge Hallyn <serue@us.ibm.com>,
	linux-kernel@vger.kernel.org, Tejun Heo <htejun@gmail.com>,
	Al Viro <viro@ftp.linux.org.uk>,
	Daniel Lezcano <dlezcano@fr.ibm.com>
Subject: Re: [PATCH 10/10] sysfs: user namespaces: fix bug with clone(CLONE_NEWUSER) with fairsched
Date: Mon, 2 Jun 2008 14:24:59 -0500	[thread overview]
Message-ID: <20080602192459.GA18509@us.ibm.com> (raw)
In-Reply-To: <20080602134439.953880460@theryb.frec.bull.fr>

Quoting Benjamin Thery (benjamin.thery@bull.net):
> Mark the /sys/kernel/uids directory to be tagged so that processes in
> different user namespaces can remount /sys and see their own uid
> listings.
> 
> Without this patch, having CONFIG_FAIR_SCHED=y makes user namespaces
> unusable, because when you 
>   clone(CLONE_NEWUSER)
> it will auto-create the root userid and try to create
> /sys/kernel/uids/0.  Since that already exists from the parent user
> namespace, the create fails, and the clone misleadingly ends up
> returning -ENOMEM.
> 
> This patch fixes the issue by allowing each user namespace to remount
> /sys, and having /sys filter the /sys/kernel/uid/ entries by user
> namespace.
> 
> Signed-off-by: Serge Hallyn <serue@us.ibm.com>
> Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>

Thanks for picking this up, Benjamin.

Eric, please look this one over.  I think I removed everything that
shouldn't be here from the last version, and at this point only
do what I need to to access the user_ns where we need it, and tag
the appropriate /sys/ files.

thanks,
-serge

> ---
>  fs/sysfs/mount.c               |   24 ++++++++++++++++++++++++
>  include/linux/sched.h          |    1 +
>  include/linux/sysfs.h          |    9 +++++++++
>  include/linux/user_namespace.h |    1 +
>  kernel/user.c                  |   21 +++++++++++++++++++++
>  kernel/user_namespace.c        |    3 ++-
>  6 files changed, 58 insertions(+), 1 deletion(-)
> 
> Index: linux-mm/fs/sysfs/mount.c
> ===================================================================
> --- linux-mm.orig/fs/sysfs/mount.c
> +++ linux-mm/fs/sysfs/mount.c
> @@ -81,6 +81,7 @@ static int sysfs_fill_super(struct super
>  	sb->s_root = root;
>  	sb->s_fs_info = info;
>  	info->tag.net_ns = hold_net(current->nsproxy->net_ns);
> +	info->tag.user_ns = current->nsproxy->user_ns;
>  	return 0;
> 
>  out_err:
> @@ -100,6 +101,8 @@ static int sysfs_test_super(struct super
> 
>  	if (task->nsproxy->net_ns != info->tag.net_ns)
>  		found = 0;
> +	if (task->nsproxy->user_ns != info->tag.user_ns)
> +		found = 0;
> 
>  	return found;
>  }
> @@ -214,6 +217,27 @@ static struct pernet_operations sysfs_ne
>  };
>  #endif
> 
> +#ifdef CONFIG_USER_NS
> +void sysfs_userns_exit(struct user_namespace *user_ns)
> +{
> +	/* Allow the net namespace to go away while sysfs is still mounted. */
> +	struct super_block *sb;
> +	printk(KERN_NOTICE "sysfs: user namespace exiting\n");
> +	mutex_lock(&sysfs_rename_mutex);
> +	sysfs_grab_supers();
> +	mutex_lock(&sysfs_mutex);
> +	list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
> +		struct sysfs_super_info *info = sysfs_info(sb);
> +		if (info->tag.user_ns != user_ns)
> +			continue;
> +		info->tag.user_ns = NULL;
> +	}
> +	mutex_unlock(&sysfs_mutex);
> +	sysfs_release_supers();
> +	mutex_unlock(&sysfs_rename_mutex);
> +}
> +#endif
> +
>  int __init sysfs_init(void)
>  {
>  	int err = -ENOMEM;
> Index: linux-mm/include/linux/sched.h
> ===================================================================
> --- linux-mm.orig/include/linux/sched.h
> +++ linux-mm/include/linux/sched.h
> @@ -600,6 +600,7 @@ struct user_struct {
>  	/* Hash table maintenance information */
>  	struct hlist_node uidhash_node;
>  	uid_t uid;
> +	struct user_namespace *user_ns;
> 
>  #ifdef CONFIG_USER_SCHED
>  	struct task_group *tg;
> Index: linux-mm/include/linux/sysfs.h
> ===================================================================
> --- linux-mm.orig/include/linux/sysfs.h
> +++ linux-mm/include/linux/sysfs.h
> @@ -20,6 +20,7 @@
>  struct kobject;
>  struct module;
>  struct net;
> +struct user_namespace;
> 
>  /* FIXME
>   * The *owner field is no longer used, but leave around
> @@ -81,6 +82,7 @@ struct sysfs_ops {
> 
>  struct sysfs_tag_info {
>  	struct net *net_ns;
> +	struct user_namespace *user_ns;
>  };
> 
>  struct sysfs_tagged_dir_operations {
> @@ -138,6 +140,9 @@ int sysfs_enable_tagging(struct kobject 
> 
>  extern int __must_check sysfs_init(void);
> 
> +struct user_namespace;
> +void sysfs_userns_exit(struct user_namespace *user_ns);
> +
>  #else /* CONFIG_SYSFS */
> 
>  static inline int sysfs_schedule_callback(struct kobject *kobj,
> @@ -254,6 +259,10 @@ static inline int __must_check sysfs_ini
>  	return 0;
>  }
> 
> +static inline void sysfs_userns_exit(struct user_namespace *user_ns)
> +{
> +}
> +
>  static inline void sysfs_printk_last_file(void)
>  {
>  }
> Index: linux-mm/include/linux/user_namespace.h
> ===================================================================
> --- linux-mm.orig/include/linux/user_namespace.h
> +++ linux-mm/include/linux/user_namespace.h
> @@ -12,6 +12,7 @@
>  struct user_namespace {
>  	struct kref		kref;
>  	struct hlist_head	uidhash_table[UIDHASH_SZ];
> +	struct kset		*kset;
>  	struct user_struct	*root_user;
>  };
> 
> Index: linux-mm/kernel/user.c
> ===================================================================
> --- linux-mm.orig/kernel/user.c
> +++ linux-mm/kernel/user.c
> @@ -53,6 +53,7 @@ struct user_struct root_user = {
>  	.files		= ATOMIC_INIT(0),
>  	.sigpending	= ATOMIC_INIT(0),
>  	.locked_shm     = 0,
> +	.user_ns	= &init_user_ns,
>  #ifdef CONFIG_USER_SCHED
>  	.tg		= &init_task_group,
>  #endif
> @@ -236,6 +237,23 @@ static void uids_release(struct kobject 
>  	return;
>  }
> 
> +static const void *userns_sb_tag(struct sysfs_tag_info *info)
> +{
> +	return info->user_ns;
> +}
> +
> +static const void *userns_kobject_tag(struct kobject *kobj)
> +{
> +	struct user_struct *up;
> +	up = container_of(kobj, struct user_struct, kobj);
> +	return up->user_ns;
> +}
> +
> +static struct sysfs_tagged_dir_operations userns_tagged_dir_operations = {
> +	.sb_tag = userns_sb_tag,
> +	.kobject_tag = userns_kobject_tag,
> +};
> +
>  static struct kobj_type uids_ktype = {
>  	.sysfs_ops = &kobj_sysfs_ops,
>  	.default_attrs = uids_attributes,
> @@ -272,6 +290,8 @@ int __init uids_sysfs_init(void)
>  	if (!uids_kset)
>  		return -ENOMEM;
> 
> +	sysfs_enable_tagging(&uids_kset->kobj, &userns_tagged_dir_operations);
> +
>  	return uids_user_create(&root_user);
>  }
> 
> @@ -404,6 +424,7 @@ struct user_struct *alloc_uid(struct use
>  			goto out_unlock;
> 
>  		new->uid = uid;
> +		new->user_ns = ns;
>  		atomic_set(&new->__count, 1);
> 
>  		if (sched_create_user(new) < 0)
> Index: linux-mm/kernel/user_namespace.c
> ===================================================================
> --- linux-mm.orig/kernel/user_namespace.c
> +++ linux-mm/kernel/user_namespace.c
> @@ -22,7 +22,7 @@ static struct user_namespace *clone_user
>  	struct user_struct *new_user;
>  	int n;
> 
> -	ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
> +	ns = kzalloc(sizeof(struct user_namespace), GFP_KERNEL);
>  	if (!ns)
>  		return ERR_PTR(-ENOMEM);
> 
> @@ -71,6 +71,7 @@ void free_user_ns(struct kref *kref)
>  	struct user_namespace *ns;
> 
>  	ns = container_of(kref, struct user_namespace, kref);
> +	sysfs_userns_exit(ns);
>  	release_uids(ns);
>  	kfree(ns);
>  }
> 
> -- 

  reply	other threads:[~2008-06-02 19:25 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-06-02 13:44 [PATCH 00/10] sysfs tagged directories V4 Benjamin Thery
2008-06-02 13:44 ` [PATCH 01/10] sysfs: Support for preventing unmounts Benjamin Thery
2008-06-02 13:44 ` [PATCH 02/10] sysfs: sysfs_get_dentry add a sb parameter Benjamin Thery
2008-06-02 13:45 ` [PATCH 03/10] sysfs: Implement __sysfs_get_dentry Benjamin Thery
2008-06-02 13:45 ` [PATCH 04/10] sysfs: Rename Support multiple superblocks Benjamin Thery
2008-06-02 13:45 ` [PATCH 05/10] sysfs: sysfs_chmod_file handle " Benjamin Thery
2008-06-02 13:45 ` [PATCH 06/10] sysfs: Implement sysfs tagged directory support Benjamin Thery
2008-06-02 13:45 ` [PATCH 07/10] sysfs: Implement sysfs_delete_link and sysfs_rename_link Benjamin Thery
2008-06-02 13:45 ` [PATCH 08/10] driver core: Implement tagged directory support for device classes Benjamin Thery
2008-06-02 13:46 ` [PATCH 09/10] netns: Enable tagging for net_class directories in sysfs Benjamin Thery
2008-06-03  4:08   ` Greg KH
2008-06-03 12:16     ` Serge E. Hallyn
2008-06-03 15:24     ` Benjamin Thery
2008-06-03 16:35       ` Greg KH
2008-06-03 19:10         ` Benjamin Thery 
2008-06-02 13:46 ` [PATCH 10/10] sysfs: user namespaces: fix bug with clone(CLONE_NEWUSER) with fairsched Benjamin Thery
2008-06-02 19:24   ` Serge E. Hallyn [this message]
2008-06-02 21:17 ` [PATCH 00/10] sysfs tagged directories V4 Dan Smith

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080602192459.GA18509@us.ibm.com \
    --to=serue@us.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=benjamin.thery@bull.net \
    --cc=dlezcano@fr.ibm.com \
    --cc=ebiederm@xmission.com \
    --cc=gregkh@suse.de \
    --cc=htejun@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@ftp.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.