* Re: [PATCH 03/11] pidns: Capture the user namespace and filter ns_last_pid
@ 2012-11-19 12:27 Zhao Hongjiang
[not found] ` <50AA259A.5030007-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Zhao Hongjiang @ 2012-11-19 12:27 UTC (permalink / raw)
To: Eric W. Biederman; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
On 2012/11/17 0:35, Eric W. Biederman wrote:
> From: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
>
> - Capture the the user namespace that creates the pid namespace
> - Use that user namespace to test if it is ok to write to
> /proc/sys/kernel/ns_last_pid.
>
> Acked-by: Serge Hallyn <serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> ---
> include/linux/pid_namespace.h | 8 +++++---
> kernel/nsproxy.c | 2 +-
> kernel/pid.c | 1 +
> kernel/pid_namespace.c | 16 +++++++++++-----
> 4 files changed, 18 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index 65e3e87..c89c9cf 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -31,6 +31,7 @@ struct pid_namespace {
> #ifdef CONFIG_BSD_PROCESS_ACCT
> struct bsd_acct_struct *bacct;
> #endif
> + struct user_namespace *user_ns;
> kgid_t pid_gid;
> int hide_pid;
> int reboot; /* group exit code if this pidns was rebooted */
> @@ -46,7 +47,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
> return ns;
> }
>
> -extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
> +extern struct pid_namespace *copy_pid_ns(unsigned long flags,
> + struct user_namespace *user_ns, struct pid_namespace *ns);
> extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
> extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
> extern void put_pid_ns(struct pid_namespace *ns);
> @@ -59,8 +61,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
> return ns;
> }
>
> -static inline struct pid_namespace *
> -copy_pid_ns(unsigned long flags, struct pid_namespace *ns)
> +static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
> + struct user_namespace *user_ns, struct pid_namespace *ns)
> {
> if (flags & CLONE_NEWPID)
> ns = ERR_PTR(-EINVAL);
> diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> index b576f7f..5fe88e1 100644
> --- a/kernel/nsproxy.c
> +++ b/kernel/nsproxy.c
> @@ -84,7 +84,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
> goto out_ipc;
> }
>
> - new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
> + new_nsp->pid_ns = copy_pid_ns(flags, task_cred_xxx(tsk, user_ns), task_active_pid_ns(tsk));
> if (IS_ERR(new_nsp->pid_ns)) {
> err = PTR_ERR(new_nsp->pid_ns);
> goto out_pid;
> diff --git a/kernel/pid.c b/kernel/pid.c
> index aebd4f5..2a624f1 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -78,6 +78,7 @@ struct pid_namespace init_pid_ns = {
> .last_pid = 0,
> .level = 0,
> .child_reaper = &init_task,
> + .user_ns = &init_user_ns,
> };
> EXPORT_SYMBOL_GPL(init_pid_ns);
>
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index 7b07cc0..7580aa0 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -10,6 +10,7 @@
>
> #include <linux/pid.h>
> #include <linux/pid_namespace.h>
> +#include <linux/user_namespace.h>
> #include <linux/syscalls.h>
> #include <linux/err.h>
> #include <linux/acct.h>
> @@ -74,7 +75,8 @@ err_alloc:
> /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
> #define MAX_PID_NS_LEVEL 32
>
> -static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)
> +static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
> + struct pid_namespace *parent_pid_ns)
> {
> struct pid_namespace *ns;
> unsigned int level = parent_pid_ns->level + 1;
> @@ -102,6 +104,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
> kref_init(&ns->kref);
> ns->level = level;
> ns->parent = get_pid_ns(parent_pid_ns);
> + ns->user_ns = get_user_ns(user_ns);
Hi Eric,
I noticed that, the increment refcount of user_ns has never released when the pid_ns's refcount is
down to zero. And i have sent out a patch to solve this on Nov 2th.
Am i misunderstand something?
Hongjiang
>
> set_bit(0, ns->pidmap[0].page);
> atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
> @@ -117,6 +120,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
>
> out_put_parent_pid_ns:
> put_pid_ns(parent_pid_ns);
> + put_user_ns(user_ns);
> out_free_map:
> kfree(ns->pidmap[0].page);
> out_free:
> @@ -134,13 +138,14 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
> kmem_cache_free(pid_ns_cachep, ns);
> }
>
> -struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
> +struct pid_namespace *copy_pid_ns(unsigned long flags,
> + struct user_namespace *user_ns, struct pid_namespace *old_ns)
> {
> if (!(flags & CLONE_NEWPID))
> return get_pid_ns(old_ns);
> if (flags & (CLONE_THREAD|CLONE_PARENT))
> return ERR_PTR(-EINVAL);
> - return create_pid_namespace(old_ns);
> + return create_pid_namespace(user_ns, old_ns);
> }
>
> static void free_pid_ns(struct kref *kref)
> @@ -239,9 +244,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
> static int pid_ns_ctl_handler(struct ctl_table *table, int write,
> void __user *buffer, size_t *lenp, loff_t *ppos)
> {
> + struct pid_namespace *pid_ns = task_active_pid_ns(current);
> struct ctl_table tmp = *table;
>
> - if (write && !capable(CAP_SYS_ADMIN))
> + if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
> return -EPERM;
>
> /*
> @@ -250,7 +256,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write,
> * it should synchronize its usage with external means.
> */
>
> - tmp.data = ¤t->nsproxy->pid_ns->last_pid;
> + tmp.data = &pid_ns->last_pid;
> return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
> }
>
>
.
^ permalink raw reply [flat|nested] 6+ messages in thread[parent not found: <50AA259A.5030007-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH 03/11] pidns: Capture the user namespace and filter ns_last_pid [not found] ` <50AA259A.5030007-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> @ 2012-11-19 12:41 ` Eric W. Biederman 0 siblings, 0 replies; 6+ messages in thread From: Eric W. Biederman @ 2012-11-19 12:41 UTC (permalink / raw) To: Zhao Hongjiang; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA Zhao Hongjiang <zhaohongjiang37-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes: > Hi Eric, > > I noticed that, the increment refcount of user_ns has never released when the pid_ns's refcount is > down to zero. And i have sent out a patch to solve this on Nov 2th. > > Am i misunderstand something? No I am. I have now added your patch to my tree and I hope to post and updated patchset shortly. Oops. Eric ^ permalink raw reply [flat|nested] 6+ messages in thread
* [REVIEW][PATCH 0/11] pid namespace cleanups and enhancements
@ 2012-11-16 16:32 Eric W. Biederman
2012-11-16 16:35 ` [PATCH 01/11] procfs: Use the proc generic infrastructure for proc/self Eric W. Biederman
0 siblings, 1 reply; 6+ messages in thread
From: Eric W. Biederman @ 2012-11-16 16:32 UTC (permalink / raw)
To: Linux Containers
Cc: Andrew Morton, linux-kernel-u79uwXL29TY76Z2rM5mHXA, Oleg Nesterov
This patchset is my pile of pid namespace patches that I have been
sitting on for entirely too long. I have been running and testing these
changes for a while but if anyone sees any problems please let me know.
Feature wise this patchset adds unshare and setns support for the pid
namespace.
Cleanup wise this patchset adds an explicit count of how many pids are
hashed in a pid namespace and uses that count to trigger the unmounting
of the internal kernel mount of proc. The current scheme is buggy and
entirely too clever to continue living.
Some proc bits that were added to support the pid namespace initially
are removed, as they are no no longer necessary.
These patches are also available at:
git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git pidns-v73
Since some of this work is closely allied with the user namespace bits I
have pending I intend to merge these changes through my user namespace
tree.
Eric W. Biederman (11):
procfs: Use the proc generic infrastructure for proc/self.
procfs: Don't cache a pid in the root inode.
pidns: Capture the user namespace and filter ns_last_pid
pidns: Use task_active_pid_ns where appropriate
pidns: Make the pidns proc mount/umount logic obvious.
pidns: Don't allow new processes in a dead pid namespace.
pidns: Wait in zap_pid_ns_processes until pid_ns->nr_hashed == 1
pidns: Deny strange cases when creating pid namespaces.
pidns: Add setns support
pidns: Consolidate initialzation of special init task state
pidns: Support unsharing the pid namespace.
arch/powerpc/platforms/cell/spufs/sched.c | 2 +-
arch/um/drivers/mconsole_kern.c | 2 +-
drivers/staging/android/binder.c | 3 +-
fs/hppfs/hppfs.c | 2 +-
fs/proc/Makefile | 1 +
fs/proc/base.c | 169 +----------------------------
fs/proc/internal.h | 1 +
fs/proc/namespaces.c | 3 +
fs/proc/root.c | 16 +---
fs/proc/self.c | 59 ++++++++++
include/linux/pid_namespace.h | 10 ++-
include/linux/proc_fs.h | 1 +
init/main.c | 1 -
kernel/cgroup.c | 2 +-
kernel/events/core.c | 2 +-
kernel/exit.c | 12 --
kernel/fork.c | 42 +++++---
kernel/nsproxy.c | 4 +-
kernel/pid.c | 46 +++++++--
kernel/pid_namespace.c | 99 +++++++++++++----
kernel/signal.c | 2 +-
kernel/sysctl_binary.c | 2 +-
22 files changed, 231 insertions(+), 250 deletions(-)
^ permalink raw reply [flat|nested] 6+ messages in thread* [PATCH 01/11] procfs: Use the proc generic infrastructure for proc/self. @ 2012-11-16 16:35 ` Eric W. Biederman [not found] ` <1353083750-3621-1-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 6+ messages in thread From: Eric W. Biederman @ 2012-11-16 16:35 UTC (permalink / raw) To: Linux Containers Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andrew Morton, Oleg Nesterov, Eric W. Biederman From: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> I had visions at one point of splitting proc into two filesystems. If that had happened proc/self being the the part of proc that actually deals with pids would have been a nice cleanup. As it is proc/self requires a lot of unnecessary infrastructure for a single file. The only user visible change is that a mounted /proc for a pid namespace that is dead now shows a broken proc symlink, instead of being completely invisible. I don't think anyone will notice or care. Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> --- fs/proc/Makefile | 1 + fs/proc/base.c | 154 +--------------------------------------------------- fs/proc/internal.h | 1 + fs/proc/root.c | 1 + fs/proc/self.c | 59 ++++++++++++++++++++ 5 files changed, 64 insertions(+), 152 deletions(-) create mode 100644 fs/proc/self.c diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 99349ef..981b056 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -21,6 +21,7 @@ proc-y += uptime.o proc-y += version.o proc-y += softirqs.o proc-y += namespaces.o +proc-y += self.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o diff --git a/fs/proc/base.c b/fs/proc/base.c index 144a967..cbe454e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2237,146 +2237,6 @@ static const struct file_operations proc_coredump_filter_operations = { }; #endif -/* - * /proc/self: - */ -static int proc_self_readlink(struct dentry *dentry, char __user *buffer, - int buflen) -{ - struct pid_namespace *ns = dentry->d_sb->s_fs_info; - pid_t tgid = task_tgid_nr_ns(current, ns); - char tmp[PROC_NUMBUF]; - if (!tgid) - return -ENOENT; - sprintf(tmp, "%d", tgid); - return vfs_readlink(dentry,buffer,buflen,tmp); -} - -static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct pid_namespace *ns = dentry->d_sb->s_fs_info; - pid_t tgid = task_tgid_nr_ns(current, ns); - char *name = ERR_PTR(-ENOENT); - if (tgid) { - /* 11 for max length of signed int in decimal + NULL term */ - name = kmalloc(12, GFP_KERNEL); - if (!name) - name = ERR_PTR(-ENOMEM); - else - sprintf(name, "%d", tgid); - } - nd_set_link(nd, name); - return NULL; -} - -static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} - -static const struct inode_operations proc_self_inode_operations = { - .readlink = proc_self_readlink, - .follow_link = proc_self_follow_link, - .put_link = proc_self_put_link, -}; - -/* - * proc base - * - * These are the directory entries in the root directory of /proc - * that properly belong to the /proc filesystem, as they describe - * describe something that is process related. - */ -static const struct pid_entry proc_base_stuff[] = { - NOD("self", S_IFLNK|S_IRWXUGO, - &proc_self_inode_operations, NULL, {}), -}; - -static struct dentry *proc_base_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) -{ - const struct pid_entry *p = ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error; - - /* Allocate the inode */ - error = ERR_PTR(-ENOMEM); - inode = new_inode(dir->i_sb); - if (!inode) - goto out; - - /* Initialize the inode */ - ei = PROC_I(inode); - inode->i_ino = get_next_ino(); - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - - /* - * grab the reference to the task. - */ - ei->pid = get_task_pid(task, PIDTYPE_PID); - if (!ei->pid) - goto out_iput; - - inode->i_mode = p->mode; - if (S_ISDIR(inode->i_mode)) - set_nlink(inode, 2); - if (S_ISLNK(inode->i_mode)) - inode->i_size = 64; - if (p->iop) - inode->i_op = p->iop; - if (p->fop) - inode->i_fop = p->fop; - ei->op = p->op; - d_add(dentry, inode); - error = NULL; -out: - return error; -out_iput: - iput(inode); - goto out; -} - -static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) -{ - struct dentry *error; - struct task_struct *task = get_proc_task(dir); - const struct pid_entry *p, *last; - - error = ERR_PTR(-ENOENT); - - if (!task) - goto out_no_task; - - /* Lookup the directory entry */ - last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; - for (p = proc_base_stuff; p <= last; p++) { - if (p->len != dentry->d_name.len) - continue; - if (!memcmp(dentry->d_name.name, p->name, p->len)) - break; - } - if (p > last) - goto out; - - error = proc_base_instantiate(dir, dentry, task, p); - -out: - put_task_struct(task); -out_no_task: - return error; -} - -static int proc_base_fill_cache(struct file *filp, void *dirent, - filldir_t filldir, struct task_struct *task, const struct pid_entry *p) -{ - return proc_fill_cache(filp, dirent, filldir, p->name, p->len, - proc_base_instantiate, task, p); -} - #ifdef CONFIG_TASK_IO_ACCOUNTING static int do_io_accounting(struct task_struct *task, char *buffer, int whole) { @@ -2767,15 +2627,11 @@ out: struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - struct dentry *result; + struct dentry *result = NULL; struct task_struct *task; unsigned tgid; struct pid_namespace *ns; - result = proc_base_lookup(dir, dentry); - if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) - goto out; - tgid = name_to_int(dentry); if (tgid == ~0U) goto out; @@ -2838,7 +2694,7 @@ retry: return iter; } -#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) +#define TGID_OFFSET (FIRST_PROCESS_ENTRY) static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct tgid_iter iter) @@ -2872,12 +2728,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) if (!reaper) goto out_no_task; - for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { - const struct pid_entry *p = &proc_base_stuff[nr]; - if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) - goto out; - } - ns = filp->f_dentry->d_sb->s_fs_info; iter.task = NULL; iter.tgid = filp->f_pos - TGID_OFFSET; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 43973b0..252544c 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -15,6 +15,7 @@ struct ctl_table_header; struct mempolicy; extern struct proc_dir_entry proc_root; +extern void proc_self_init(void); #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); extern void sysctl_head_put(struct ctl_table_header *head); diff --git a/fs/proc/root.c b/fs/proc/root.c index 9889a92..5da9849 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -169,6 +169,7 @@ void __init proc_root_init(void) return; } + proc_self_init(); proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); diff --git a/fs/proc/self.c b/fs/proc/self.c new file mode 100644 index 0000000..aa5cc3b --- /dev/null +++ b/fs/proc/self.c @@ -0,0 +1,59 @@ +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/namei.h> + +/* + * /proc/self: + */ +static int proc_self_readlink(struct dentry *dentry, char __user *buffer, + int buflen) +{ + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); + char tmp[PROC_NUMBUF]; + if (!tgid) + return -ENOENT; + sprintf(tmp, "%d", tgid); + return vfs_readlink(dentry,buffer,buflen,tmp); +} + +static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); + char *name = ERR_PTR(-ENOENT); + if (tgid) { + /* 11 for max length of signed int in decimal + NULL term */ + name = kmalloc(12, GFP_KERNEL); + if (!name) + name = ERR_PTR(-ENOMEM); + else + sprintf(name, "%d", tgid); + } + nd_set_link(nd, name); + return NULL; +} + +static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + kfree(s); +} + +static const struct inode_operations proc_self_inode_operations = { + .readlink = proc_self_readlink, + .follow_link = proc_self_follow_link, + .put_link = proc_self_put_link, +}; + +void __init proc_self_init(void) +{ + struct proc_dir_entry *proc_self_symlink; + mode_t mode; + + mode = S_IFLNK | S_IRWXUGO; + proc_self_symlink = proc_create("self", mode, NULL, NULL ); + proc_self_symlink->proc_iops = &proc_self_inode_operations; +} -- 1.7.5.4 ^ permalink raw reply related [flat|nested] 6+ messages in thread
[parent not found: <1353083750-3621-1-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>]
* [PATCH 03/11] pidns: Capture the user namespace and filter ns_last_pid 2012-11-16 16:35 ` [PATCH 01/11] procfs: Use the proc generic infrastructure for proc/self Eric W. Biederman @ 2012-11-16 16:35 ` Eric W. Biederman 0 siblings, 0 replies; 6+ messages in thread From: Eric W. Biederman @ 2012-11-16 16:35 UTC (permalink / raw) To: Linux Containers Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andrew Morton, Oleg Nesterov, Eric W. Biederman From: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> - Capture the the user namespace that creates the pid namespace - Use that user namespace to test if it is ok to write to /proc/sys/kernel/ns_last_pid. Acked-by: Serge Hallyn <serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> --- include/linux/pid_namespace.h | 8 +++++--- kernel/nsproxy.c | 2 +- kernel/pid.c | 1 + kernel/pid_namespace.c | 16 +++++++++++----- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 65e3e87..c89c9cf 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -31,6 +31,7 @@ struct pid_namespace { #ifdef CONFIG_BSD_PROCESS_ACCT struct bsd_acct_struct *bacct; #endif + struct user_namespace *user_ns; kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ @@ -46,7 +47,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } -extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); +extern struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *ns); extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); extern void put_pid_ns(struct pid_namespace *ns); @@ -59,8 +61,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } -static inline struct pid_namespace * -copy_pid_ns(unsigned long flags, struct pid_namespace *ns) +static inline struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *ns) { if (flags & CLONE_NEWPID) ns = ERR_PTR(-EINVAL); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index b576f7f..5fe88e1 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -84,7 +84,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_ipc; } - new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); + new_nsp->pid_ns = copy_pid_ns(flags, task_cred_xxx(tsk, user_ns), task_active_pid_ns(tsk)); if (IS_ERR(new_nsp->pid_ns)) { err = PTR_ERR(new_nsp->pid_ns); goto out_pid; diff --git a/kernel/pid.c b/kernel/pid.c index aebd4f5..2a624f1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -78,6 +78,7 @@ struct pid_namespace init_pid_ns = { .last_pid = 0, .level = 0, .child_reaper = &init_task, + .user_ns = &init_user_ns, }; EXPORT_SYMBOL_GPL(init_pid_ns); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7b07cc0..7580aa0 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -10,6 +10,7 @@ #include <linux/pid.h> #include <linux/pid_namespace.h> +#include <linux/user_namespace.h> #include <linux/syscalls.h> #include <linux/err.h> #include <linux/acct.h> @@ -74,7 +75,8 @@ err_alloc: /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ #define MAX_PID_NS_LEVEL 32 -static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) +static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, + struct pid_namespace *parent_pid_ns) { struct pid_namespace *ns; unsigned int level = parent_pid_ns->level + 1; @@ -102,6 +104,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p kref_init(&ns->kref); ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); + ns->user_ns = get_user_ns(user_ns); set_bit(0, ns->pidmap[0].page); atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); @@ -117,6 +120,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p out_put_parent_pid_ns: put_pid_ns(parent_pid_ns); + put_user_ns(user_ns); out_free_map: kfree(ns->pidmap[0].page); out_free: @@ -134,13 +138,14 @@ static void destroy_pid_namespace(struct pid_namespace *ns) kmem_cache_free(pid_ns_cachep, ns); } -struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) +struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *old_ns) { if (!(flags & CLONE_NEWPID)) return get_pid_ns(old_ns); if (flags & (CLONE_THREAD|CLONE_PARENT)) return ERR_PTR(-EINVAL); - return create_pid_namespace(old_ns); + return create_pid_namespace(user_ns, old_ns); } static void free_pid_ns(struct kref *kref) @@ -239,9 +244,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) static int pid_ns_ctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct pid_namespace *pid_ns = task_active_pid_ns(current); struct ctl_table tmp = *table; - if (write && !capable(CAP_SYS_ADMIN)) + if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; /* @@ -250,7 +256,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, * it should synchronize its usage with external means. */ - tmp.data = ¤t->nsproxy->pid_ns->last_pid; + tmp.data = &pid_ns->last_pid; return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); } -- 1.7.5.4 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 03/11] pidns: Capture the user namespace and filter ns_last_pid @ 2012-11-16 16:35 ` Eric W. Biederman 0 siblings, 0 replies; 6+ messages in thread From: Eric W. Biederman @ 2012-11-16 16:35 UTC (permalink / raw) To: Linux Containers Cc: linux-kernel, Oleg Nesterov, Serge Hallyn, Gao feng, Andrew Morton, Eric W. Biederman From: "Eric W. Biederman" <ebiederm@xmission.com> - Capture the the user namespace that creates the pid namespace - Use that user namespace to test if it is ok to write to /proc/sys/kernel/ns_last_pid. Acked-by: Serge Hallyn <serge.hallyn@canonical.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> --- include/linux/pid_namespace.h | 8 +++++--- kernel/nsproxy.c | 2 +- kernel/pid.c | 1 + kernel/pid_namespace.c | 16 +++++++++++----- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 65e3e87..c89c9cf 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -31,6 +31,7 @@ struct pid_namespace { #ifdef CONFIG_BSD_PROCESS_ACCT struct bsd_acct_struct *bacct; #endif + struct user_namespace *user_ns; kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ @@ -46,7 +47,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } -extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); +extern struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *ns); extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); extern void put_pid_ns(struct pid_namespace *ns); @@ -59,8 +61,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) return ns; } -static inline struct pid_namespace * -copy_pid_ns(unsigned long flags, struct pid_namespace *ns) +static inline struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *ns) { if (flags & CLONE_NEWPID) ns = ERR_PTR(-EINVAL); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index b576f7f..5fe88e1 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -84,7 +84,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_ipc; } - new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); + new_nsp->pid_ns = copy_pid_ns(flags, task_cred_xxx(tsk, user_ns), task_active_pid_ns(tsk)); if (IS_ERR(new_nsp->pid_ns)) { err = PTR_ERR(new_nsp->pid_ns); goto out_pid; diff --git a/kernel/pid.c b/kernel/pid.c index aebd4f5..2a624f1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -78,6 +78,7 @@ struct pid_namespace init_pid_ns = { .last_pid = 0, .level = 0, .child_reaper = &init_task, + .user_ns = &init_user_ns, }; EXPORT_SYMBOL_GPL(init_pid_ns); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7b07cc0..7580aa0 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -10,6 +10,7 @@ #include <linux/pid.h> #include <linux/pid_namespace.h> +#include <linux/user_namespace.h> #include <linux/syscalls.h> #include <linux/err.h> #include <linux/acct.h> @@ -74,7 +75,8 @@ err_alloc: /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ #define MAX_PID_NS_LEVEL 32 -static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) +static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, + struct pid_namespace *parent_pid_ns) { struct pid_namespace *ns; unsigned int level = parent_pid_ns->level + 1; @@ -102,6 +104,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p kref_init(&ns->kref); ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); + ns->user_ns = get_user_ns(user_ns); set_bit(0, ns->pidmap[0].page); atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); @@ -117,6 +120,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p out_put_parent_pid_ns: put_pid_ns(parent_pid_ns); + put_user_ns(user_ns); out_free_map: kfree(ns->pidmap[0].page); out_free: @@ -134,13 +138,14 @@ static void destroy_pid_namespace(struct pid_namespace *ns) kmem_cache_free(pid_ns_cachep, ns); } -struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) +struct pid_namespace *copy_pid_ns(unsigned long flags, + struct user_namespace *user_ns, struct pid_namespace *old_ns) { if (!(flags & CLONE_NEWPID)) return get_pid_ns(old_ns); if (flags & (CLONE_THREAD|CLONE_PARENT)) return ERR_PTR(-EINVAL); - return create_pid_namespace(old_ns); + return create_pid_namespace(user_ns, old_ns); } static void free_pid_ns(struct kref *kref) @@ -239,9 +244,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) static int pid_ns_ctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct pid_namespace *pid_ns = task_active_pid_ns(current); struct ctl_table tmp = *table; - if (write && !capable(CAP_SYS_ADMIN)) + if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; /* @@ -250,7 +256,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, * it should synchronize its usage with external means. */ - tmp.data = ¤t->nsproxy->pid_ns->last_pid; + tmp.data = &pid_ns->last_pid; return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); } -- 1.7.5.4 ^ permalink raw reply related [flat|nested] 6+ messages in thread
[parent not found: <1353083750-3621-3-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>]
* Re: [PATCH 03/11] pidns: Capture the user namespace and filter ns_last_pid 2012-11-16 16:35 ` Eric W. Biederman @ 2012-11-21 1:26 ` Gao feng -1 siblings, 0 replies; 6+ messages in thread From: Gao feng @ 2012-11-21 1:26 UTC (permalink / raw) To: Eric W. Biederman Cc: Linux Containers, linux-kernel-u79uwXL29TY76Z2rM5mHXA, Andrew Morton, Oleg Nesterov on 2012/11/17 00:35, Eric W. Biederman wrote: > From: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> > > - Capture the the user namespace that creates the pid namespace > - Use that user namespace to test if it is ok to write to > /proc/sys/kernel/ns_last_pid. > > Acked-by: Serge Hallyn <serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org> > Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> > --- Acked-by: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 03/11] pidns: Capture the user namespace and filter ns_last_pid @ 2012-11-21 1:26 ` Gao feng 0 siblings, 0 replies; 6+ messages in thread From: Gao feng @ 2012-11-21 1:26 UTC (permalink / raw) To: Eric W. Biederman Cc: Linux Containers, linux-kernel, Oleg Nesterov, Serge Hallyn, Andrew Morton on 2012/11/17 00:35, Eric W. Biederman wrote: > From: "Eric W. Biederman" <ebiederm@xmission.com> > > - Capture the the user namespace that creates the pid namespace > - Use that user namespace to test if it is ok to write to > /proc/sys/kernel/ns_last_pid. > > Acked-by: Serge Hallyn <serge.hallyn@canonical.com> > Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> > --- Acked-by: Gao feng <gaofeng@cn.fujitsu.com> ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2012-11-21 1:26 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-11-19 12:27 [PATCH 03/11] pidns: Capture the user namespace and filter ns_last_pid Zhao Hongjiang
[not found] ` <50AA259A.5030007-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2012-11-19 12:41 ` Eric W. Biederman
-- strict thread matches above, loose matches on Subject: below --
2012-11-16 16:32 [REVIEW][PATCH 0/11] pid namespace cleanups and enhancements Eric W. Biederman
2012-11-16 16:35 ` [PATCH 01/11] procfs: Use the proc generic infrastructure for proc/self Eric W. Biederman
[not found] ` <1353083750-3621-1-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2012-11-16 16:35 ` [PATCH 03/11] pidns: Capture the user namespace and filter ns_last_pid Eric W. Biederman
2012-11-16 16:35 ` Eric W. Biederman
[not found] ` <1353083750-3621-3-git-send-email-ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
2012-11-21 1:26 ` Gao feng
2012-11-21 1:26 ` Gao feng
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.