All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch -mm 0/5] mqueue namespace
@ 2007-10-02  8:46 Cedric Le Goater
  0 siblings, 0 replies; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-02  8:46 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Hello,

Here's the mqueue namespace patchset. 

If you're OK with it, I'll rebase on the next -mm which should contain
the new CONFIG_NAMESPACES and send to andrew.

Thanks !

C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found] <20071002084608.149781400@fr.ibm.com>
@ 2007-10-02  8:46 ` Cedric Le Goater
  2007-10-02  8:46 ` [patch -mm 2/5] mqueue namespace : add unshare support Cedric Le Goater
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-02  8:46 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA; +Cc: Cedric Le Goater

[-- Attachment #1: mq_namespace-add-mq_namespace.patch --]
[-- Type: text/plain, Size: 11181 bytes --]

From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

This patch adds a struct mq_namespace holding the common attributes 
of the mqueue namespace. 

The current code is modified to use the default mqueue namespace 
object 'init_mq_ns' and to prepare the ground for futur dynamic 
objects.

Todo:
	- use CONFIG_NAMESPACE when next -mm is released

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
---
 include/linux/mq_namespace.h |   60 +++++++++++++++++++++++
 ipc/mqueue.c                 |  111 +++++++++++++++++++++++++++----------------
 2 files changed, 130 insertions(+), 41 deletions(-)

Index: 2.6.23-rc8-mm2/include/linux/mq_namespace.h
===================================================================
--- /dev/null
+++ 2.6.23-rc8-mm2/include/linux/mq_namespace.h
@@ -0,0 +1,60 @@
+#ifndef _LINUX_MQ_NAMESPACE_H
+#define _LINUX_MQ_NAMESPACE_H
+
+#include <linux/kref.h>
+
+struct vfsmount;
+
+struct mq_namespace {
+	struct kref	kref;
+	struct vfsmount *mnt;
+
+	unsigned int	queues_count;
+	unsigned int	queues_max;
+	unsigned int	msg_max;
+	unsigned int	msgsize_max;
+};
+
+extern struct mq_namespace init_mq_ns;
+
+#ifdef CONFIG_POSIX_MQUEUE
+
+#define INIT_MQ_NS(ns)		.ns		= &init_mq_ns,
+
+static inline struct mq_namespace *get_mq_ns(struct mq_namespace *ns)
+{
+	if (ns)
+		kref_get(&ns->kref);
+	return ns;
+}
+
+extern struct mq_namespace *copy_mq_ns(unsigned long flags,
+				struct mq_namespace *old_ns);
+extern void free_mq_ns(struct kref *kref);
+
+static inline void put_mq_ns(struct mq_namespace *ns)
+{
+	if (ns)
+		kref_put(&ns->kref, free_mq_ns);
+}
+
+#else
+
+#define INIT_MQ_NS(ns)
+
+static inline struct mq_namespace *get_mq_ns(struct mq_namespace *ns)
+{
+	return ns;
+}
+
+static inline struct mq_namespace *copy_mq_ns(unsigned long flags,
+					struct mq_namespace *old_ns)
+{
+	return old_ns;
+}
+
+static inline void put_mq_ns(struct mq_namespace *ns) { }
+
+#endif /* CONFIG_POSIX_MQUEUE */
+
+#endif /* _LINUX_MQ_H */
Index: 2.6.23-rc8-mm2/ipc/mqueue.c
===================================================================
--- 2.6.23-rc8-mm2.orig/ipc/mqueue.c
+++ 2.6.23-rc8-mm2/ipc/mqueue.c
@@ -31,6 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/nsproxy.h>
 #include <linux/pid.h>
+#include <linux/mq_namespace.h>
 
 #include <net/sock.h>
 #include "util.h"
@@ -87,12 +88,18 @@ static void remove_notification(struct m
 
 static spinlock_t mq_lock;
 static struct kmem_cache *mqueue_inode_cachep;
-static struct vfsmount *mqueue_mnt;
 
-static unsigned int queues_count;
-static unsigned int queues_max 	= DFLT_QUEUESMAX;
-static unsigned int msg_max 	= DFLT_MSGMAX;
-static unsigned int msgsize_max = DFLT_MSGSIZEMAX;
+struct mq_namespace init_mq_ns = {
+	.kref = {
+		.refcount = ATOMIC_INIT(2),
+	},
+	.mnt		= NULL,
+	.queues_count	= 0,
+	.queues_max 	= DFLT_QUEUESMAX,
+	.msg_max 	= DFLT_MSGMAX,
+	.msgsize_max	= DFLT_MSGSIZEMAX,
+};
+
 
 static struct ctl_table_header * mq_sysctl_table;
 
@@ -101,6 +108,21 @@ static inline struct mqueue_inode_info *
 	return container_of(inode, struct mqueue_inode_info, vfs_inode);
 }
 
+struct mq_namespace *copy_mq_ns(unsigned long flags,
+				struct mq_namespace *old_ns)
+{
+	BUG_ON(!old_ns);
+	return get_mq_ns(old_ns);
+}
+
+void free_mq_ns(struct kref *kref)
+{
+	struct mq_namespace *mq_ns;
+
+	mq_ns = container_of(kref, struct mq_namespace, kref);
+	kfree(mq_ns);
+}
+
 static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 							struct mq_attr *attr)
 {
@@ -235,6 +257,7 @@ static void mqueue_delete_inode(struct i
 	struct user_struct *user;
 	unsigned long mq_bytes;
 	int i;
+	struct mq_namespace *mq_ns = &init_mq_ns;
 
 	if (S_ISDIR(inode->i_mode)) {
 		clear_inode(inode);
@@ -255,7 +278,7 @@ static void mqueue_delete_inode(struct i
 	if (user) {
 		spin_lock(&mq_lock);
 		user->mq_bytes -= mq_bytes;
-		queues_count--;
+		mq_ns->queues_count--;
 		spin_unlock(&mq_lock);
 		free_uid(user);
 	}
@@ -267,20 +290,22 @@ static int mqueue_create(struct inode *d
 	struct inode *inode;
 	struct mq_attr *attr = dentry->d_fsdata;
 	int error;
+	struct mq_namespace *mq_ns = &init_mq_ns;
 
 	spin_lock(&mq_lock);
-	if (queues_count >= queues_max && !capable(CAP_SYS_RESOURCE)) {
+	if (mq_ns->queues_count >= mq_ns->queues_max &&
+		!capable(CAP_SYS_RESOURCE)) {
 		error = -ENOSPC;
 		goto out_lock;
 	}
-	queues_count++;
+	mq_ns->queues_count++;
 	spin_unlock(&mq_lock);
 
 	inode = mqueue_get_inode(dir->i_sb, mode, attr);
 	if (!inode) {
 		error = -ENOMEM;
 		spin_lock(&mq_lock);
-		queues_count--;
+		mq_ns->queues_count--;
 		goto out_lock;
 	}
 
@@ -571,7 +596,7 @@ static void remove_notification(struct m
 	info->notify_owner = NULL;
 }
 
-static int mq_attr_ok(struct mq_attr *attr)
+static int mq_attr_ok(struct mq_namespace *mq_ns, struct mq_attr *attr)
 {
 	if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
 		return 0;
@@ -579,8 +604,8 @@ static int mq_attr_ok(struct mq_attr *at
 		if (attr->mq_maxmsg > HARD_MSGMAX)
 			return 0;
 	} else {
-		if (attr->mq_maxmsg > msg_max ||
-				attr->mq_msgsize > msgsize_max)
+		if (attr->mq_maxmsg > mq_ns->msg_max ||
+				attr->mq_msgsize > mq_ns->msgsize_max)
 			return 0;
 	}
 	/* check for overflow */
@@ -596,8 +621,9 @@ static int mq_attr_ok(struct mq_attr *at
 /*
  * Invoked when creating a new queue via sys_mq_open
  */
-static struct file *do_create(struct dentry *dir, struct dentry *dentry,
-			int oflag, mode_t mode, struct mq_attr __user *u_attr)
+static struct file *do_create(struct mq_namespace *mq_ns, struct dentry *dir,
+			struct dentry *dentry, int oflag, mode_t mode,
+			struct mq_attr __user *u_attr)
 {
 	struct mq_attr attr;
 	int ret;
@@ -607,7 +633,7 @@ static struct file *do_create(struct den
 		if (copy_from_user(&attr, u_attr, sizeof(attr)))
 			goto out;
 		ret = -EINVAL;
-		if (!mq_attr_ok(&attr))
+		if (!mq_attr_ok(mq_ns, &attr))
 			goto out;
 		/* store for use during create */
 		dentry->d_fsdata = &attr;
@@ -619,33 +645,34 @@ static struct file *do_create(struct den
 	if (ret)
 		goto out;
 
-	return dentry_open(dentry, mqueue_mnt, oflag);
+	return dentry_open(dentry, mq_ns->mnt, oflag);
 
 out:
 	dput(dentry);
-	mntput(mqueue_mnt);
+	mntput(mq_ns->mnt);
 	return ERR_PTR(ret);
 }
 
 /* Opens existing queue */
-static struct file *do_open(struct dentry *dentry, int oflag)
+static struct file *do_open(struct mq_namespace *mq_ns, struct dentry *dentry,
+			int oflag)
 {
 static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
 					MAY_READ | MAY_WRITE };
 
 	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
 		dput(dentry);
-		mntput(mqueue_mnt);
+		mntput(mq_ns->mnt);
 		return ERR_PTR(-EINVAL);
 	}
 
 	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
 		dput(dentry);
-		mntput(mqueue_mnt);
+		mntput(mq_ns->mnt);
 		return ERR_PTR(-EACCES);
 	}
 
-	return dentry_open(dentry, mqueue_mnt, oflag);
+	return dentry_open(dentry, mq_ns->mnt, oflag);
 }
 
 asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
@@ -655,6 +682,7 @@ asmlinkage long sys_mq_open(const char _
 	struct file *filp;
 	char *name;
 	int fd, error;
+	struct mq_namespace *mq_ns = &init_mq_ns;
 
 	error = audit_mq_open(oflag, mode, u_attr);
 	if (error != 0)
@@ -667,13 +695,13 @@ asmlinkage long sys_mq_open(const char _
 	if (fd < 0)
 		goto out_putname;
 
-	mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
-	dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
+	mutex_lock(&mq_ns->mnt->mnt_root->d_inode->i_mutex);
+	dentry = lookup_one_len(name, mq_ns->mnt->mnt_root, strlen(name));
 	if (IS_ERR(dentry)) {
 		error = PTR_ERR(dentry);
 		goto out_err;
 	}
-	mntget(mqueue_mnt);
+	mntget(mq_ns->mnt);
 
 	if (oflag & O_CREAT) {
 		if (dentry->d_inode) {	/* entry already exists */
@@ -681,12 +709,12 @@ asmlinkage long sys_mq_open(const char _
 			error = -EEXIST;
 			if (oflag & O_EXCL)
 				goto out;
-			filp = do_open(dentry, oflag);
+			filp = do_open(mq_ns, dentry, oflag);
 		} else {
-			error = mnt_want_write(mqueue_mnt);
+			error = mnt_want_write(mq_ns->mnt);
 			if (error)
 				goto out;
-			filp = do_create(mqueue_mnt->mnt_root, dentry,
+			filp = do_create(mq_ns, mq_ns->mnt->mnt_root, dentry,
 						oflag, mode, u_attr);
 		}
 	} else {
@@ -694,7 +722,7 @@ asmlinkage long sys_mq_open(const char _
 		if (!dentry->d_inode)
 			goto out;
 		audit_inode(name, dentry);
-		filp = do_open(dentry, oflag);
+		filp = do_open(mq_ns, dentry, oflag);
 	}
 
 	if (IS_ERR(filp)) {
@@ -708,13 +736,13 @@ asmlinkage long sys_mq_open(const char _
 
 out:
 	dput(dentry);
-	mntput(mqueue_mnt);
+	mntput(mq_ns->mnt);
 out_putfd:
 	put_unused_fd(fd);
 out_err:
 	fd = error;
 out_upsem:
-	mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
+	mutex_unlock(&mq_ns->mnt->mnt_root->d_inode->i_mutex);
 out_putname:
 	putname(name);
 	return fd;
@@ -726,14 +754,15 @@ asmlinkage long sys_mq_unlink(const char
 	char *name;
 	struct dentry *dentry;
 	struct inode *inode = NULL;
+	struct mq_namespace *mq_ns = &init_mq_ns;
 
 	name = getname(u_name);
 	if (IS_ERR(name))
 		return PTR_ERR(name);
 
-	mutex_lock_nested(&mqueue_mnt->mnt_root->d_inode->i_mutex,
+	mutex_lock_nested(&mq_ns->mnt->mnt_root->d_inode->i_mutex,
 			I_MUTEX_PARENT);
-	dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
+	dentry = lookup_one_len(name, mq_ns->mnt->mnt_root, strlen(name));
 	if (IS_ERR(dentry)) {
 		err = PTR_ERR(dentry);
 		goto out_unlock;
@@ -747,16 +776,16 @@ asmlinkage long sys_mq_unlink(const char
 	inode = dentry->d_inode;
 	if (inode)
 		atomic_inc(&inode->i_count);
-	err = mnt_want_write(mqueue_mnt);
+	err = mnt_want_write(mq_ns->mnt);
 	if (err)
 		goto out_err;
 	err = vfs_unlink(dentry->d_parent->d_inode, dentry);
-	mnt_drop_write(mqueue_mnt);
+	mnt_drop_write(mq_ns->mnt);
 out_err:
 	dput(dentry);
 
 out_unlock:
-	mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
+	mutex_unlock(&mq_ns->mnt->mnt_root->d_inode->i_mutex);
 	putname(name);
 	if (inode)
 		iput(inode);
@@ -1201,14 +1230,14 @@ static int msg_maxsize_limit_max = INT_M
 static ctl_table mq_sysctls[] = {
 	{
 		.procname	= "queues_max",
-		.data		= &queues_max,
+		.data		= &init_mq_ns.queues_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
 	{
 		.procname	= "msg_max",
-		.data		= &msg_max,
+		.data		= &init_mq_ns.msg_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
@@ -1217,7 +1246,7 @@ static ctl_table mq_sysctls[] = {
 	},
 	{
 		.procname	= "msgsize_max",
-		.data		= &msgsize_max,
+		.data		= &init_mq_ns.msgsize_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
@@ -1263,13 +1292,13 @@ static int __init init_mqueue_fs(void)
 	if (error)
 		goto out_sysctl;
 
-	if (IS_ERR(mqueue_mnt = kern_mount(&mqueue_fs_type))) {
-		error = PTR_ERR(mqueue_mnt);
+	init_mq_ns.mnt = kern_mount(&mqueue_fs_type);
+	if (IS_ERR(init_mq_ns.mnt)) {
+		error = PTR_ERR(init_mq_ns.mnt);
 		goto out_filesystem;
 	}
 
 	/* internal initialization - not common for vfs */
-	queues_count = 0;
 	spin_lock_init(&mq_lock);
 
 	return 0;

-- 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch -mm 2/5] mqueue namespace : add unshare support
       [not found] <20071002084608.149781400@fr.ibm.com>
  2007-10-02  8:46 ` [patch -mm 1/5] mqueue namespace : add struct mq_namespace Cedric Le Goater
@ 2007-10-02  8:46 ` Cedric Le Goater
  2007-10-02  8:46 ` [patch -mm 3/5] mqueue namespace : add get_sb_single_per_data() Cedric Le Goater
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-02  8:46 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA; +Cc: Cedric Le Goater

[-- Attachment #1: mq_namespace-add-mq_namespace-to-nsproxy.patch --]
[-- Type: text/plain, Size: 6462 bytes --]

From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

This patch includes the mqueue  namespace in the nsproxy object. It  
also adds the support of unshare() and clone() with a new clone flag 
CLONE_NEWMQ (1 bit left in the clone flags !)

CLONE_NEWMQ is required to be cloned or unshared along with CLONE_NEWNS.
This is to make sure that no user mounts of the internal mqueue fs
are left behind when the last task exits. 

It's totally harmless for the moment because the current code still 
uses the default mqueue namespace object 'init_mq_ns' 

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
---
 include/linux/init_task.h |    2 ++
 include/linux/nsproxy.h   |    2 ++
 include/linux/sched.h     |    1 +
 ipc/mqueue.c              |   29 ++++++++++++++++++++++++++++-
 kernel/fork.c             |   16 +++++++++++++++-
 kernel/nsproxy.c          |   15 +++++++++++++--
 6 files changed, 61 insertions(+), 4 deletions(-)

Index: 2.6.23-rc8-mm2/include/linux/init_task.h
===================================================================
--- 2.6.23-rc8-mm2.orig/include/linux/init_task.h
+++ 2.6.23-rc8-mm2/include/linux/init_task.h
@@ -10,6 +10,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <net/net_namespace.h>
+#include <linux/mq_namespace.h>
 
 #define INIT_FDTABLE \
 {							\
@@ -78,6 +79,7 @@ extern struct nsproxy init_nsproxy;
 	INIT_NET_NS(net_ns)                                             \
 	INIT_IPC_NS(ipc_ns)						\
 	.user_ns	= &init_user_ns,				\
+	INIT_MQ_NS(mq_ns)						\
 }
 
 #define INIT_SIGHAND(sighand) {						\
Index: 2.6.23-rc8-mm2/include/linux/sched.h
===================================================================
--- 2.6.23-rc8-mm2.orig/include/linux/sched.h
+++ 2.6.23-rc8-mm2/include/linux/sched.h
@@ -26,6 +26,7 @@
 #define CLONE_NEWIPC		0x08000000	/* New ipcs */
 #define CLONE_NEWUSER		0x10000000	/* New user namespace */
 #define CLONE_NEWPID		0x20000000	/* New pid namespace */
+#define CLONE_NEWMQ		0x40000000	/* New posix mqueue namespace */
 
 /*
  * Scheduling policies
Index: 2.6.23-rc8-mm2/kernel/nsproxy.c
===================================================================
--- 2.6.23-rc8-mm2.orig/kernel/nsproxy.c
+++ 2.6.23-rc8-mm2/kernel/nsproxy.c
@@ -85,8 +85,17 @@ static struct nsproxy *create_new_namesp
 		goto out_user;
 	}
 
+	new_nsp->mq_ns = copy_mq_ns(flags, tsk->nsproxy->mq_ns);
+	if (IS_ERR(new_nsp->mq_ns)) {
+		err = PTR_ERR(new_nsp->mq_ns);
+		goto out_mq;
+	}
+
 	return new_nsp;
 
+out_mq:
+	if (new_nsp->user_ns)
+		put_user_ns(new_nsp->user_ns);
 out_user:
 	if (new_nsp->pid_ns)
 		put_pid_ns(new_nsp->pid_ns);
@@ -120,7 +129,7 @@ int copy_namespaces(unsigned long flags,
 	get_nsproxy(old_ns);
 
 	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-					CLONE_NEWUSER | CLONE_NEWPID)))
+		       CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWMQ)))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN)) {
@@ -159,6 +168,8 @@ void free_nsproxy(struct nsproxy *ns)
 		put_pid_ns(ns->pid_ns);
 	if (ns->user_ns)
 		put_user_ns(ns->user_ns);
+	if (ns->mq_ns)
+		put_mq_ns(ns->mq_ns);
 	kmem_cache_free(nsproxy_cachep, ns);
 }
 
@@ -172,7 +183,7 @@ int unshare_nsproxy_namespaces(unsigned 
 	int err = 0;
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWUSER)))
+			       CLONE_NEWUSER | CLONE_NEWMQ)))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN))
Index: 2.6.23-rc8-mm2/kernel/fork.c
===================================================================
--- 2.6.23-rc8-mm2.orig/kernel/fork.c
+++ 2.6.23-rc8-mm2/kernel/fork.c
@@ -1002,6 +1002,13 @@ static struct task_struct *copy_process(
 	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
 		return ERR_PTR(-EINVAL);
 
+	/*
+	 * mount namespace cannot be unshared when the mqueue
+	 * namespace is not
+	 */
+	if ((clone_flags & CLONE_NEWMQ) && !(clone_flags & CLONE_NEWNS))
+		return ERR_PTR(-EINVAL);
+
 	retval = security_task_create(clone_flags);
 	if (retval)
 		goto fork_out;
@@ -1552,6 +1559,12 @@ static inline void check_unshare_flags(u
 		*flags_ptr |= CLONE_THREAD;
 
 	/*
+	 * If unsharing mqueue namespace, must also unshare mnt namespace.
+	 */
+	if (*flags_ptr & CLONE_NEWMQ)
+		*flags_ptr |= CLONE_NEWNS;
+
+	/*
 	 * If unsharing namespace, must also unshare filesystem information.
 	 */
 	if (*flags_ptr & CLONE_NEWNS)
@@ -1668,7 +1681,8 @@ asmlinkage long sys_unshare(unsigned lon
 	err = -EINVAL;
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER))
+				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
+				CLONE_NEWMQ))
 		goto bad_unshare_out;
 
 	if ((err = unshare_thread(unshare_flags)))
Index: 2.6.23-rc8-mm2/include/linux/nsproxy.h
===================================================================
--- 2.6.23-rc8-mm2.orig/include/linux/nsproxy.h
+++ 2.6.23-rc8-mm2/include/linux/nsproxy.h
@@ -8,6 +8,7 @@ struct mnt_namespace;
 struct uts_namespace;
 struct ipc_namespace;
 struct pid_namespace;
+struct mq_namespace;
 
 /*
  * A structure to contain pointers to all per-process
@@ -29,6 +30,7 @@ struct nsproxy {
 	struct pid_namespace *pid_ns;
 	struct user_namespace *user_ns;
 	struct net 	     *net_ns;
+	struct mq_namespace *mq_ns;
 };
 extern struct nsproxy init_nsproxy;
 
Index: 2.6.23-rc8-mm2/ipc/mqueue.c
===================================================================
--- 2.6.23-rc8-mm2.orig/ipc/mqueue.c
+++ 2.6.23-rc8-mm2/ipc/mqueue.c
@@ -108,11 +108,38 @@ static inline struct mqueue_inode_info *
 	return container_of(inode, struct mqueue_inode_info, vfs_inode);
 }
 
+static struct mq_namespace *clone_mq_ns(struct mq_namespace *old_ns)
+{
+	struct mq_namespace *mq_ns;
+
+	mq_ns = kmalloc(sizeof(struct mq_namespace), GFP_KERNEL);
+	if (!mq_ns)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&mq_ns->kref);
+	mq_ns->queues_count	= 0;
+	mq_ns->queues_max	= DFLT_QUEUESMAX;
+	mq_ns->msg_max		= DFLT_MSGMAX;
+	mq_ns->msgsize_max	= DFLT_MSGSIZEMAX;
+	mq_ns->mnt		= NULL;
+	return mq_ns;
+}
+
 struct mq_namespace *copy_mq_ns(unsigned long flags,
 				struct mq_namespace *old_ns)
 {
+	struct mq_namespace *mq_ns;
+
 	BUG_ON(!old_ns);
-	return get_mq_ns(old_ns);
+	get_mq_ns(old_ns);
+
+	if (!(flags & CLONE_NEWMQ))
+		return old_ns;
+
+	mq_ns = clone_mq_ns(old_ns);
+
+	put_mq_ns(old_ns);
+	return mq_ns;
 }
 
 void free_mq_ns(struct kref *kref)

-- 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch -mm 3/5] mqueue namespace : add get_sb_single_per_data()
       [not found] <20071002084608.149781400@fr.ibm.com>
  2007-10-02  8:46 ` [patch -mm 1/5] mqueue namespace : add struct mq_namespace Cedric Le Goater
  2007-10-02  8:46 ` [patch -mm 2/5] mqueue namespace : add unshare support Cedric Le Goater
@ 2007-10-02  8:46 ` Cedric Le Goater
  2007-10-02  8:46 ` [patch -mm 4/5] mqueue namespace : enable the namespace Cedric Le Goater
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-02  8:46 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA; +Cc: Cedric Le Goater

[-- Attachment #1: add-get_sb_single_per_data.patch --]
[-- Type: text/plain, Size: 2611 bytes --]

From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

This is a new helper routine very similar to get_sb_single() which 
uses the 'data' argument and the 's_fs_info' attribute of the 
super_block to create a single super_block per 'data' value.

if 'data' is a pointer to a namespace, it makes it easy to create
a single super_block per namespace. This is something we need for
the mqueue file system, and other single super_block internal
file systems.

Dave, can you review this one to make sure the helper routine 
belongs to fs/super.c ?

Thanks

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

---
 fs/super.c         |   32 ++++++++++++++++++++++++++++++++
 include/linux/fs.h |    4 ++++
 2 files changed, 36 insertions(+)

Index: 2.6.23-rc8-mm2/fs/super.c
===================================================================
--- 2.6.23-rc8-mm2.orig/fs/super.c
+++ 2.6.23-rc8-mm2/fs/super.c
@@ -853,6 +853,38 @@ int get_sb_single(struct file_system_typ
 
 EXPORT_SYMBOL(get_sb_single);
 
+static int compare_data(struct super_block *sb, void *data)
+{
+	return (void *)sb->s_fs_info == data;
+}
+
+int get_sb_single_per_data(struct file_system_type *fs_type,
+	int flags, void *data,
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
+{
+	struct super_block *s;
+	int error;
+
+	s = sget(fs_type, compare_data, set_anon_super, data);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+	if (!s->s_root) {
+		s->s_flags = flags;
+		s->s_fs_info = data;
+		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
+		if (error) {
+			up_write(&s->s_umount);
+			deactivate_super(s);
+			return error;
+		}
+		s->s_flags |= MS_ACTIVE;
+	}
+	do_remount_sb(s, flags, data, 0);
+	return simple_set_mnt(mnt, s);
+}
+EXPORT_SYMBOL(get_sb_single_per_data);
+
 struct vfsmount *
 vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
 {
Index: 2.6.23-rc8-mm2/include/linux/fs.h
===================================================================
--- 2.6.23-rc8-mm2.orig/include/linux/fs.h
+++ 2.6.23-rc8-mm2/include/linux/fs.h
@@ -1437,6 +1437,10 @@ extern int get_sb_single(struct file_sys
 	int flags, void *data,
 	int (*fill_super)(struct super_block *, void *, int),
 	struct vfsmount *mnt);
+extern int get_sb_single_per_data(struct file_system_type *fs_type,
+	int flags, void *data,
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt);
 extern int get_sb_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
 	int (*fill_super)(struct super_block *, void *, int),

-- 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch -mm 4/5] mqueue namespace : enable the namespace
       [not found] <20071002084608.149781400@fr.ibm.com>
                   ` (2 preceding siblings ...)
  2007-10-02  8:46 ` [patch -mm 3/5] mqueue namespace : add get_sb_single_per_data() Cedric Le Goater
@ 2007-10-02  8:46 ` Cedric Le Goater
  2007-10-02  8:46 ` [patch -mm 5/5] mqueue namespace : make sysctl work per namespace Cedric Le Goater
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-02  8:46 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA; +Cc: Cedric Le Goater

[-- Attachment #1: mq_namespace-use-mq_namespace.patch --]
[-- Type: text/plain, Size: 3383 bytes --]

From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

Move forward and start using the mqueue namespace.

The single super block mount of the file system is modified to allow 
one mount per namespace. This is achieved by storing the namespace 
in the super_block s_fs_info attribute. 

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
---
 ipc/mqueue.c |   26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

Index: 2.6.23-rc8-mm2/ipc/mqueue.c
===================================================================
--- 2.6.23-rc8-mm2.orig/ipc/mqueue.c
+++ 2.6.23-rc8-mm2/ipc/mqueue.c
@@ -108,6 +108,8 @@ static inline struct mqueue_inode_info *
 	return container_of(inode, struct mqueue_inode_info, vfs_inode);
 }
 
+static struct file_system_type mqueue_fs_type;
+
 static struct mq_namespace *clone_mq_ns(struct mq_namespace *old_ns)
 {
 	struct mq_namespace *mq_ns;
@@ -121,7 +123,12 @@ static struct mq_namespace *clone_mq_ns(
 	mq_ns->queues_max	= DFLT_QUEUESMAX;
 	mq_ns->msg_max		= DFLT_MSGMAX;
 	mq_ns->msgsize_max	= DFLT_MSGSIZEMAX;
-	mq_ns->mnt		= NULL;
+	mq_ns->mnt		= kern_mount_data(&mqueue_fs_type, mq_ns);
+	if (IS_ERR(mq_ns->mnt)) {
+		void *error = mq_ns->mnt;
+		kfree(mq_ns);
+		return error;
+	}
 	return mq_ns;
 }
 
@@ -147,6 +154,7 @@ void free_mq_ns(struct kref *kref)
 	struct mq_namespace *mq_ns;
 
 	mq_ns = container_of(kref, struct mq_namespace, kref);
+	mntput(mq_ns->mnt);
 	kfree(mq_ns);
 }
 
@@ -253,7 +261,11 @@ static int mqueue_get_sb(struct file_sys
 			 int flags, const char *dev_name,
 			 void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt);
+	if (!(flags & MS_KERNMOUNT))
+		data = current->nsproxy->mq_ns;
+
+	return get_sb_single_per_data(fs_type, flags, data, mqueue_fill_super,
+					mnt);
 }
 
 static void init_once(struct kmem_cache *cachep, void *foo)
@@ -284,7 +296,7 @@ static void mqueue_delete_inode(struct i
 	struct user_struct *user;
 	unsigned long mq_bytes;
 	int i;
-	struct mq_namespace *mq_ns = &init_mq_ns;
+	struct mq_namespace *mq_ns = inode->i_sb->s_fs_info;
 
 	if (S_ISDIR(inode->i_mode)) {
 		clear_inode(inode);
@@ -317,7 +329,7 @@ static int mqueue_create(struct inode *d
 	struct inode *inode;
 	struct mq_attr *attr = dentry->d_fsdata;
 	int error;
-	struct mq_namespace *mq_ns = &init_mq_ns;
+	struct mq_namespace *mq_ns = dir->i_sb->s_fs_info;
 
 	spin_lock(&mq_lock);
 	if (mq_ns->queues_count >= mq_ns->queues_max &&
@@ -709,7 +721,7 @@ asmlinkage long sys_mq_open(const char _
 	struct file *filp;
 	char *name;
 	int fd, error;
-	struct mq_namespace *mq_ns = &init_mq_ns;
+	struct mq_namespace *mq_ns = current->nsproxy->mq_ns;
 
 	error = audit_mq_open(oflag, mode, u_attr);
 	if (error != 0)
@@ -781,7 +793,7 @@ asmlinkage long sys_mq_unlink(const char
 	char *name;
 	struct dentry *dentry;
 	struct inode *inode = NULL;
-	struct mq_namespace *mq_ns = &init_mq_ns;
+	struct mq_namespace *mq_ns = current->nsproxy->mq_ns;
 
 	name = getname(u_name);
 	if (IS_ERR(name))
@@ -1319,7 +1331,7 @@ static int __init init_mqueue_fs(void)
 	if (error)
 		goto out_sysctl;
 
-	init_mq_ns.mnt = kern_mount(&mqueue_fs_type);
+	init_mq_ns.mnt = kern_mount_data(&mqueue_fs_type, &init_mq_ns);
 	if (IS_ERR(init_mq_ns.mnt)) {
 		error = PTR_ERR(init_mq_ns.mnt);
 		goto out_filesystem;

-- 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch -mm 5/5] mqueue namespace : make sysctl work per namespace
       [not found] <20071002084608.149781400@fr.ibm.com>
                   ` (3 preceding siblings ...)
  2007-10-02  8:46 ` [patch -mm 4/5] mqueue namespace : enable the namespace Cedric Le Goater
@ 2007-10-02  8:46 ` Cedric Le Goater
       [not found] ` <20071002084906.477406083@fr.ibm.com>
       [not found] ` <20071002084608.149781400-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  6 siblings, 0 replies; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-02  8:46 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA; +Cc: Cedric Le Goater

[-- Attachment #1: mq_namespace-fix-sysctl.patch --]
[-- Type: text/plain, Size: 2509 bytes --]

From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>

Largely inspired from ipc/ipc_sysctl.c

Todo: 
      - check CONFIG_* options
      - move code to ipc/mqueue_sysctl.c ?

Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
---
 ipc/mqueue.c |   40 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

Index: 2.6.23-rc8-mm2/ipc/mqueue.c
===================================================================
--- 2.6.23-rc8-mm2.orig/ipc/mqueue.c
+++ 2.6.23-rc8-mm2/ipc/mqueue.c
@@ -1260,6 +1260,40 @@ static struct file_system_type mqueue_fs
 	.kill_sb = kill_litter_super,
 };
 
+static void *get_mq(ctl_table *table)
+{
+	char *which = table->data;
+	struct mq_namespace *mq_ns = current->nsproxy->mq_ns;
+	which = (which - (char *)&init_mq_ns) + (char *)mq_ns;
+	return which;
+}
+
+#ifdef CONFIG_PROC_FS
+static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
+	void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table mq_table;
+	memcpy(&mq_table, table, sizeof(mq_table));
+	mq_table.data = get_mq(table);
+
+	return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos);
+}
+
+static int proc_mq_dointvec_minmax(ctl_table *table, int write,
+	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table mq_table;
+	memcpy(&mq_table, table, sizeof(mq_table));
+	mq_table.data = get_mq(table);
+
+	return proc_dointvec_minmax(&mq_table, write, filp, buffer,
+					lenp, ppos);
+}
+#else
+#define proc_mq_dointvec_minmax NULL
+#define proc_mq_dointvec	NULL
+#endif
+
 static int msg_max_limit_min = DFLT_MSGMAX;
 static int msg_max_limit_max = HARD_MSGMAX;
 
@@ -1272,14 +1306,14 @@ static ctl_table mq_sysctls[] = {
 		.data		= &init_mq_ns.queues_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_mq_dointvec,
 	},
 	{
 		.procname	= "msg_max",
 		.data		= &init_mq_ns.msg_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
+		.proc_handler	= &proc_mq_dointvec_minmax,
 		.extra1		= &msg_max_limit_min,
 		.extra2		= &msg_max_limit_max,
 	},
@@ -1288,7 +1322,7 @@ static ctl_table mq_sysctls[] = {
 		.data		= &init_mq_ns.msgsize_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
+		.proc_handler	= &proc_mq_dointvec_minmax,
 		.extra1		= &msg_maxsize_limit_min,
 		.extra2		= &msg_maxsize_limit_max,
 	},

-- 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]   ` <20071002084906.477406083-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2007-10-02  9:06     ` Kirill Korotaev
       [not found]       ` <47020A29.9060403-3ImXcnM4P+0@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Kirill Korotaev @ 2007-10-02  9:06 UTC (permalink / raw)
  To: Cedric Le Goater; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Cedric,

how safe does it intersect with netlinks from network namespace?
I see mqueues can send netlink messages, have you checked how safe it is?

Thanks,
Kirill

Cedric Le Goater wrote:
> From: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
> 
> This patch adds a struct mq_namespace holding the common attributes 
> of the mqueue namespace. 
> 
> The current code is modified to use the default mqueue namespace 
> object 'init_mq_ns' and to prepare the ground for futur dynamic 
> objects.
> 
> Todo:
> 	- use CONFIG_NAMESPACE when next -mm is released
> 
> Signed-off-by: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
> ---
>  include/linux/mq_namespace.h |   60 +++++++++++++++++++++++
>  ipc/mqueue.c                 |  111 +++++++++++++++++++++++++++----------------
>  2 files changed, 130 insertions(+), 41 deletions(-)
> 
> Index: 2.6.23-rc8-mm2/include/linux/mq_namespace.h
> ===================================================================
> --- /dev/null
> +++ 2.6.23-rc8-mm2/include/linux/mq_namespace.h
> @@ -0,0 +1,60 @@
> +#ifndef _LINUX_MQ_NAMESPACE_H
> +#define _LINUX_MQ_NAMESPACE_H
> +
> +#include <linux/kref.h>
> +
> +struct vfsmount;
> +
> +struct mq_namespace {
> +	struct kref	kref;
> +	struct vfsmount *mnt;
> +
> +	unsigned int	queues_count;
> +	unsigned int	queues_max;
> +	unsigned int	msg_max;
> +	unsigned int	msgsize_max;
> +};
> +
> +extern struct mq_namespace init_mq_ns;
> +
> +#ifdef CONFIG_POSIX_MQUEUE
> +
> +#define INIT_MQ_NS(ns)		.ns		= &init_mq_ns,
> +
> +static inline struct mq_namespace *get_mq_ns(struct mq_namespace *ns)
> +{
> +	if (ns)
> +		kref_get(&ns->kref);
> +	return ns;
> +}
> +
> +extern struct mq_namespace *copy_mq_ns(unsigned long flags,
> +				struct mq_namespace *old_ns);
> +extern void free_mq_ns(struct kref *kref);
> +
> +static inline void put_mq_ns(struct mq_namespace *ns)
> +{
> +	if (ns)
> +		kref_put(&ns->kref, free_mq_ns);
> +}
> +
> +#else
> +
> +#define INIT_MQ_NS(ns)
> +
> +static inline struct mq_namespace *get_mq_ns(struct mq_namespace *ns)
> +{
> +	return ns;
> +}
> +
> +static inline struct mq_namespace *copy_mq_ns(unsigned long flags,
> +					struct mq_namespace *old_ns)
> +{
> +	return old_ns;
> +}
> +
> +static inline void put_mq_ns(struct mq_namespace *ns) { }
> +
> +#endif /* CONFIG_POSIX_MQUEUE */
> +
> +#endif /* _LINUX_MQ_H */
> Index: 2.6.23-rc8-mm2/ipc/mqueue.c
> ===================================================================
> --- 2.6.23-rc8-mm2.orig/ipc/mqueue.c
> +++ 2.6.23-rc8-mm2/ipc/mqueue.c
> @@ -31,6 +31,7 @@
>  #include <linux/mutex.h>
>  #include <linux/nsproxy.h>
>  #include <linux/pid.h>
> +#include <linux/mq_namespace.h>
>  
>  #include <net/sock.h>
>  #include "util.h"
> @@ -87,12 +88,18 @@ static void remove_notification(struct m
>  
>  static spinlock_t mq_lock;
>  static struct kmem_cache *mqueue_inode_cachep;
> -static struct vfsmount *mqueue_mnt;
>  
> -static unsigned int queues_count;
> -static unsigned int queues_max 	= DFLT_QUEUESMAX;
> -static unsigned int msg_max 	= DFLT_MSGMAX;
> -static unsigned int msgsize_max = DFLT_MSGSIZEMAX;
> +struct mq_namespace init_mq_ns = {
> +	.kref = {
> +		.refcount = ATOMIC_INIT(2),
> +	},
> +	.mnt		= NULL,
> +	.queues_count	= 0,
> +	.queues_max 	= DFLT_QUEUESMAX,
> +	.msg_max 	= DFLT_MSGMAX,
> +	.msgsize_max	= DFLT_MSGSIZEMAX,
> +};
> +
>  
>  static struct ctl_table_header * mq_sysctl_table;
>  
> @@ -101,6 +108,21 @@ static inline struct mqueue_inode_info *
>  	return container_of(inode, struct mqueue_inode_info, vfs_inode);
>  }
>  
> +struct mq_namespace *copy_mq_ns(unsigned long flags,
> +				struct mq_namespace *old_ns)
> +{
> +	BUG_ON(!old_ns);
> +	return get_mq_ns(old_ns);
> +}
> +
> +void free_mq_ns(struct kref *kref)
> +{
> +	struct mq_namespace *mq_ns;
> +
> +	mq_ns = container_of(kref, struct mq_namespace, kref);
> +	kfree(mq_ns);
> +}
> +
>  static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
>  							struct mq_attr *attr)
>  {
> @@ -235,6 +257,7 @@ static void mqueue_delete_inode(struct i
>  	struct user_struct *user;
>  	unsigned long mq_bytes;
>  	int i;
> +	struct mq_namespace *mq_ns = &init_mq_ns;
>  
>  	if (S_ISDIR(inode->i_mode)) {
>  		clear_inode(inode);
> @@ -255,7 +278,7 @@ static void mqueue_delete_inode(struct i
>  	if (user) {
>  		spin_lock(&mq_lock);
>  		user->mq_bytes -= mq_bytes;
> -		queues_count--;
> +		mq_ns->queues_count--;
>  		spin_unlock(&mq_lock);
>  		free_uid(user);
>  	}
> @@ -267,20 +290,22 @@ static int mqueue_create(struct inode *d
>  	struct inode *inode;
>  	struct mq_attr *attr = dentry->d_fsdata;
>  	int error;
> +	struct mq_namespace *mq_ns = &init_mq_ns;
>  
>  	spin_lock(&mq_lock);
> -	if (queues_count >= queues_max && !capable(CAP_SYS_RESOURCE)) {
> +	if (mq_ns->queues_count >= mq_ns->queues_max &&
> +		!capable(CAP_SYS_RESOURCE)) {
>  		error = -ENOSPC;
>  		goto out_lock;
>  	}
> -	queues_count++;
> +	mq_ns->queues_count++;
>  	spin_unlock(&mq_lock);
>  
>  	inode = mqueue_get_inode(dir->i_sb, mode, attr);
>  	if (!inode) {
>  		error = -ENOMEM;
>  		spin_lock(&mq_lock);
> -		queues_count--;
> +		mq_ns->queues_count--;
>  		goto out_lock;
>  	}
>  
> @@ -571,7 +596,7 @@ static void remove_notification(struct m
>  	info->notify_owner = NULL;
>  }
>  
> -static int mq_attr_ok(struct mq_attr *attr)
> +static int mq_attr_ok(struct mq_namespace *mq_ns, struct mq_attr *attr)
>  {
>  	if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
>  		return 0;
> @@ -579,8 +604,8 @@ static int mq_attr_ok(struct mq_attr *at
>  		if (attr->mq_maxmsg > HARD_MSGMAX)
>  			return 0;
>  	} else {
> -		if (attr->mq_maxmsg > msg_max ||
> -				attr->mq_msgsize > msgsize_max)
> +		if (attr->mq_maxmsg > mq_ns->msg_max ||
> +				attr->mq_msgsize > mq_ns->msgsize_max)
>  			return 0;
>  	}
>  	/* check for overflow */
> @@ -596,8 +621,9 @@ static int mq_attr_ok(struct mq_attr *at
>  /*
>   * Invoked when creating a new queue via sys_mq_open
>   */
> -static struct file *do_create(struct dentry *dir, struct dentry *dentry,
> -			int oflag, mode_t mode, struct mq_attr __user *u_attr)
> +static struct file *do_create(struct mq_namespace *mq_ns, struct dentry *dir,
> +			struct dentry *dentry, int oflag, mode_t mode,
> +			struct mq_attr __user *u_attr)
>  {
>  	struct mq_attr attr;
>  	int ret;
> @@ -607,7 +633,7 @@ static struct file *do_create(struct den
>  		if (copy_from_user(&attr, u_attr, sizeof(attr)))
>  			goto out;
>  		ret = -EINVAL;
> -		if (!mq_attr_ok(&attr))
> +		if (!mq_attr_ok(mq_ns, &attr))
>  			goto out;
>  		/* store for use during create */
>  		dentry->d_fsdata = &attr;
> @@ -619,33 +645,34 @@ static struct file *do_create(struct den
>  	if (ret)
>  		goto out;
>  
> -	return dentry_open(dentry, mqueue_mnt, oflag);
> +	return dentry_open(dentry, mq_ns->mnt, oflag);
>  
>  out:
>  	dput(dentry);
> -	mntput(mqueue_mnt);
> +	mntput(mq_ns->mnt);
>  	return ERR_PTR(ret);
>  }
>  
>  /* Opens existing queue */
> -static struct file *do_open(struct dentry *dentry, int oflag)
> +static struct file *do_open(struct mq_namespace *mq_ns, struct dentry *dentry,
> +			int oflag)
>  {
>  static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
>  					MAY_READ | MAY_WRITE };
>  
>  	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
>  		dput(dentry);
> -		mntput(mqueue_mnt);
> +		mntput(mq_ns->mnt);
>  		return ERR_PTR(-EINVAL);
>  	}
>  
>  	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
>  		dput(dentry);
> -		mntput(mqueue_mnt);
> +		mntput(mq_ns->mnt);
>  		return ERR_PTR(-EACCES);
>  	}
>  
> -	return dentry_open(dentry, mqueue_mnt, oflag);
> +	return dentry_open(dentry, mq_ns->mnt, oflag);
>  }
>  
>  asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
> @@ -655,6 +682,7 @@ asmlinkage long sys_mq_open(const char _
>  	struct file *filp;
>  	char *name;
>  	int fd, error;
> +	struct mq_namespace *mq_ns = &init_mq_ns;
>  
>  	error = audit_mq_open(oflag, mode, u_attr);
>  	if (error != 0)
> @@ -667,13 +695,13 @@ asmlinkage long sys_mq_open(const char _
>  	if (fd < 0)
>  		goto out_putname;
>  
> -	mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
> -	dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
> +	mutex_lock(&mq_ns->mnt->mnt_root->d_inode->i_mutex);
> +	dentry = lookup_one_len(name, mq_ns->mnt->mnt_root, strlen(name));
>  	if (IS_ERR(dentry)) {
>  		error = PTR_ERR(dentry);
>  		goto out_err;
>  	}
> -	mntget(mqueue_mnt);
> +	mntget(mq_ns->mnt);
>  
>  	if (oflag & O_CREAT) {
>  		if (dentry->d_inode) {	/* entry already exists */
> @@ -681,12 +709,12 @@ asmlinkage long sys_mq_open(const char _
>  			error = -EEXIST;
>  			if (oflag & O_EXCL)
>  				goto out;
> -			filp = do_open(dentry, oflag);
> +			filp = do_open(mq_ns, dentry, oflag);
>  		} else {
> -			error = mnt_want_write(mqueue_mnt);
> +			error = mnt_want_write(mq_ns->mnt);
>  			if (error)
>  				goto out;
> -			filp = do_create(mqueue_mnt->mnt_root, dentry,
> +			filp = do_create(mq_ns, mq_ns->mnt->mnt_root, dentry,
>  						oflag, mode, u_attr);
>  		}
>  	} else {
> @@ -694,7 +722,7 @@ asmlinkage long sys_mq_open(const char _
>  		if (!dentry->d_inode)
>  			goto out;
>  		audit_inode(name, dentry);
> -		filp = do_open(dentry, oflag);
> +		filp = do_open(mq_ns, dentry, oflag);
>  	}
>  
>  	if (IS_ERR(filp)) {
> @@ -708,13 +736,13 @@ asmlinkage long sys_mq_open(const char _
>  
>  out:
>  	dput(dentry);
> -	mntput(mqueue_mnt);
> +	mntput(mq_ns->mnt);
>  out_putfd:
>  	put_unused_fd(fd);
>  out_err:
>  	fd = error;
>  out_upsem:
> -	mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
> +	mutex_unlock(&mq_ns->mnt->mnt_root->d_inode->i_mutex);
>  out_putname:
>  	putname(name);
>  	return fd;
> @@ -726,14 +754,15 @@ asmlinkage long sys_mq_unlink(const char
>  	char *name;
>  	struct dentry *dentry;
>  	struct inode *inode = NULL;
> +	struct mq_namespace *mq_ns = &init_mq_ns;
>  
>  	name = getname(u_name);
>  	if (IS_ERR(name))
>  		return PTR_ERR(name);
>  
> -	mutex_lock_nested(&mqueue_mnt->mnt_root->d_inode->i_mutex,
> +	mutex_lock_nested(&mq_ns->mnt->mnt_root->d_inode->i_mutex,
>  			I_MUTEX_PARENT);
> -	dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
> +	dentry = lookup_one_len(name, mq_ns->mnt->mnt_root, strlen(name));
>  	if (IS_ERR(dentry)) {
>  		err = PTR_ERR(dentry);
>  		goto out_unlock;
> @@ -747,16 +776,16 @@ asmlinkage long sys_mq_unlink(const char
>  	inode = dentry->d_inode;
>  	if (inode)
>  		atomic_inc(&inode->i_count);
> -	err = mnt_want_write(mqueue_mnt);
> +	err = mnt_want_write(mq_ns->mnt);
>  	if (err)
>  		goto out_err;
>  	err = vfs_unlink(dentry->d_parent->d_inode, dentry);
> -	mnt_drop_write(mqueue_mnt);
> +	mnt_drop_write(mq_ns->mnt);
>  out_err:
>  	dput(dentry);
>  
>  out_unlock:
> -	mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
> +	mutex_unlock(&mq_ns->mnt->mnt_root->d_inode->i_mutex);
>  	putname(name);
>  	if (inode)
>  		iput(inode);
> @@ -1201,14 +1230,14 @@ static int msg_maxsize_limit_max = INT_M
>  static ctl_table mq_sysctls[] = {
>  	{
>  		.procname	= "queues_max",
> -		.data		= &queues_max,
> +		.data		= &init_mq_ns.queues_max,
>  		.maxlen		= sizeof(int),
>  		.mode		= 0644,
>  		.proc_handler	= &proc_dointvec,
>  	},
>  	{
>  		.procname	= "msg_max",
> -		.data		= &msg_max,
> +		.data		= &init_mq_ns.msg_max,
>  		.maxlen		= sizeof(int),
>  		.mode		= 0644,
>  		.proc_handler	= &proc_dointvec_minmax,
> @@ -1217,7 +1246,7 @@ static ctl_table mq_sysctls[] = {
>  	},
>  	{
>  		.procname	= "msgsize_max",
> -		.data		= &msgsize_max,
> +		.data		= &init_mq_ns.msgsize_max,
>  		.maxlen		= sizeof(int),
>  		.mode		= 0644,
>  		.proc_handler	= &proc_dointvec_minmax,
> @@ -1263,13 +1292,13 @@ static int __init init_mqueue_fs(void)
>  	if (error)
>  		goto out_sysctl;
>  
> -	if (IS_ERR(mqueue_mnt = kern_mount(&mqueue_fs_type))) {
> -		error = PTR_ERR(mqueue_mnt);
> +	init_mq_ns.mnt = kern_mount(&mqueue_fs_type);
> +	if (IS_ERR(init_mq_ns.mnt)) {
> +		error = PTR_ERR(init_mq_ns.mnt);
>  		goto out_filesystem;
>  	}
>  
>  	/* internal initialization - not common for vfs */
> -	queues_count = 0;
>  	spin_lock_init(&mq_lock);
>  
>  	return 0;
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]       ` <47020A29.9060403-3ImXcnM4P+0@public.gmane.org>
@ 2007-10-02 10:13         ` Cedric Le Goater
       [not found]           ` <470219BC.3050702-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-02 10:13 UTC (permalink / raw)
  To: Kirill Korotaev
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Eric W. Biederman

Hello Kirill,

Kirill Korotaev wrote:
> Cedric,
> 
> how safe does it intersect with netlinks from network namespace?
> I see mqueues can send netlink messages, have you checked how safe it is?

a ref is taken on the 'struct sock' in the mq_notify() syscall and the
skbuff which will be send to notify the user is also allocated in the
mq_notify() syscall. So we should be in the same net namespace when we 
register the notification and when we notify. 

I hope the net guys can confirm or we will easily check in the next 
-lxc patchset which will merge this patchset with netns.
 
however, we have an issue with the signal notification in __do_notify()
we could kill a process in a different pid namespace.

C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 0/5] mqueue namespace
       [not found] ` <20071002084608.149781400-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2007-10-02 10:50   ` Eric W. Biederman
       [not found]     ` <m1przxlrim.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
  2007-10-02 11:01   ` Eric W. Biederman
  1 sibling, 1 reply; 28+ messages in thread
From: Eric W. Biederman @ 2007-10-02 10:50 UTC (permalink / raw)
  To: Cedric Le Goater; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org> writes:

> Hello,
>
> Here's the mqueue namespace patchset. 
>
> If you're OK with it, I'll rebase on the next -mm which should contain
> the new CONFIG_NAMESPACES and send to andrew.

Well the CLONE flag is a conflict with the network namespace.

I was hoping someone had managed to make message queues work
through the filesystem API and then this would just be a matter
of multiple mounts of a filesystem.

However given the syscalls and the rest I guess this really does
count as a separate namespace :(

Eric

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]           ` <470219BC.3050702-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2007-10-02 10:59             ` Eric W. Biederman
       [not found]               ` <m1ir5plr3c.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Eric W. Biederman @ 2007-10-02 10:59 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org> writes:

> Hello Kirill,
>
> Kirill Korotaev wrote:
>> Cedric,
>> 
>> how safe does it intersect with netlinks from network namespace?
>> I see mqueues can send netlink messages, have you checked how safe it is?
>
> a ref is taken on the 'struct sock' in the mq_notify() syscall and the
> skbuff which will be send to notify the user is also allocated in the
> mq_notify() syscall. So we should be in the same net namespace when we 
> register the notification and when we notify. 
>
> I hope the net guys can confirm or we will easily check in the next 
> -lxc patchset which will merge this patchset with netns.
>  
> however, we have an issue with the signal notification in __do_notify()
> we could kill a process in a different pid namespace.

So I took a quick look at the code as it is (before this patchset)
and the taking a reference to a socket and the taking a reference to
a struct pid should do the right thing when we intersect with other
namespaces.  It certainly does not look like a fundamental issue.

In practice the patchset as written  does conflict with the network
namespace work in the net-2.6.24 tree so some adjustments will need
to be made.

Eric

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 0/5] mqueue namespace
       [not found] ` <20071002084608.149781400-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  2007-10-02 10:50   ` [patch -mm 0/5] mqueue namespace Eric W. Biederman
@ 2007-10-02 11:01   ` Eric W. Biederman
       [not found]     ` <m1ejgdlr0a.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
  1 sibling, 1 reply; 28+ messages in thread
From: Eric W. Biederman @ 2007-10-02 11:01 UTC (permalink / raw)
  To: Cedric Le Goater; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org> writes:

> Hello,
>
> Here's the mqueue namespace patchset. 
>
> If you're OK with it, I'll rebase on the next -mm which should contain
> the new CONFIG_NAMESPACES and send to andrew.

And before I forget.

At least until someone has done an audit and we are certain we
have gotten all of the user space interfaces.

We need to Kconfig the whole thing so support creating one of these
things depends on CONFIG_EXPERIMENTAL.

Once we have the code merged and have audited the merge result
we can remove the Kconfig and the CONFIG_EXPERIMENTAL bit.

Eric

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 0/5] mqueue namespace
       [not found]     ` <m1przxlrim.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
@ 2007-10-02 11:06       ` Cedric Le Goater
  0 siblings, 0 replies; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-02 11:06 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Eric W. Biederman wrote:
> Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org> writes:
> 
>> Hello,
>>
>> Here's the mqueue namespace patchset. 
>>
>> If you're OK with it, I'll rebase on the next -mm which should contain
>> the new CONFIG_NAMESPACES and send to andrew.
> 
> Well the CLONE flag is a conflict with the network namespace.

Indeed. For the moment, this is for review because I'm waiting for a few 
namespace patches to go in -mm. When this time comes, I'll rebase.  

> I was hoping someone had managed to make message queues work
> through the filesystem API and then this would just be a matter
> of multiple mounts of a filesystem.

I've added a small helper routine to handle the case of a single
super_block per namespace. I called it get_sb_single_per_data()

It could be used in pid namespace also and probably a few other fs.

> However given the syscalls and the rest I guess this really does
> count as a separate namespace :(

yep ...

C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 0/5] mqueue namespace
       [not found]     ` <m1ejgdlr0a.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
@ 2007-10-02 11:41       ` Cedric Le Goater
  0 siblings, 0 replies; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-02 11:41 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Eric W. Biederman wrote:
> Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org> writes:
> 
>> Hello,
>>
>> Here's the mqueue namespace patchset. 
>>
>> If you're OK with it, I'll rebase on the next -mm which should contain
>> the new CONFIG_NAMESPACES and send to andrew.
> 
> And before I forget.
> 
> At least until someone has done an audit and we are certain we
> have gotten all of the user space interfaces.
> 
> We need to Kconfig the whole thing so support creating one of these
> things depends on CONFIG_EXPERIMENTAL.
> 
> Once we have the code merged and have audited the merge result
> we can remove the Kconfig and the CONFIG_EXPERIMENTAL bit.

sure. I'm just waiting for pavel's new CONFIG_NAMESPACES* options 
to go in -mm. 

Thanks !

C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]               ` <m1ir5plr3c.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
@ 2007-10-02 12:21                 ` Cedric Le Goater
       [not found]                   ` <470237C7.5000902-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-02 12:21 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA


>> however, we have an issue with the signal notification in __do_notify()
>> we could kill a process in a different pid namespace.
> 
> So I took a quick look at the code as it is (before this patchset)
> and the taking a reference to a socket and the taking a reference to
> a struct pid should do the right thing when we intersect with other
> namespaces.  It certainly does not look like a fundamental issue.

right. this should be covered when the pid namespace signal handling is 
complete. kill_pid_info() should fail to send a signal to a sibling or 
a parent pid namespace. 

I guess we should add a WARNING() to say that we're attempting to do so.

> In practice the patchset as written  does conflict with the network
> namespace work in the net-2.6.24 tree so some adjustments will need
> to be made.

I think no more than fixing the CLONE flags in sched.h and the conflicts
in nsproxy.c.  

Thanks !

C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                   ` <470237C7.5000902-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2007-10-02 16:30                     ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA
       [not found]                       ` <20071002163020.GA11207-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2007-10-02 17:02                     ` Eric W. Biederman
  1 sibling, 1 reply; 28+ messages in thread
From: sukadev-r/Jw6+rmf7HQT0dZR+AlfA @ 2007-10-02 16:30 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Eric W. Biederman

Cedric Le Goater [clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org] wrote:
| 
| >> however, we have an issue with the signal notification in __do_notify()
| >> we could kill a process in a different pid namespace.
| > 
| > So I took a quick look at the code as it is (before this patchset)
| > and the taking a reference to a socket and the taking a reference to
| > a struct pid should do the right thing when we intersect with other
| > namespaces.  It certainly does not look like a fundamental issue.

| 
| right. this should be covered when the pid namespace signal handling is 
| complete. kill_pid_info() should fail to send a signal to a sibling or 
| a parent pid namespace. 
| 
| I guess we should add a WARNING() to say that we're attempting to do so.

Just want to clarify how a signal is sent to a parent ns.

	A process P1 sets itself up to be notified when a message arrives
	on a queue.

	P1 then clones P2 with CLONE_NEWPID.

	P2 writes to the message queue and thus signals P1

What should the semantics be here ?

I guess it makes less sense for two namespaces to be dependent on the same
message queue this way.  But, if P2 writes to the queue, technically, the
queue is not empty, so P1 should be notified, no ? 

This sounds similar to the SIGIO signal case (F_SETOWN). My understanding
was that we would notify whoever was set to receive the notification, even
if they were in a parent ns (again my reasoning was its based on the state
of a file).

IOW,  should we change kill_pid_info() ?  If the caller can 'see' the
'struct pid' they can signal it. The expectation was that callers would
call find_vpid() and thus only see processes in their namespace.

| 
| > In practice the patchset as written  does conflict with the network
| > namespace work in the net-2.6.24 tree so some adjustments will need
| > to be made.
| 
| I think no more than fixing the CLONE flags in sched.h and the conflicts
| in nsproxy.c.  
| 
| Thanks !
| 
| C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                   ` <470237C7.5000902-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  2007-10-02 16:30                     ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA
@ 2007-10-02 17:02                     ` Eric W. Biederman
  1 sibling, 0 replies; 28+ messages in thread
From: Eric W. Biederman @ 2007-10-02 17:02 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org> writes:

>>> however, we have an issue with the signal notification in __do_notify()
>>> we could kill a process in a different pid namespace.
>> 
>> So I took a quick look at the code as it is (before this patchset)
>> and the taking a reference to a socket and the taking a reference to
>> a struct pid should do the right thing when we intersect with other
>> namespaces.  It certainly does not look like a fundamental issue.
>
> right. this should be covered when the pid namespace signal handling is 
> complete. kill_pid_info() should fail to send a signal to a sibling or 
> a parent pid namespace. 

Huh?

If we call sys_mq_notify and we become the owner then it should
not be a problem to send a signal to us.

> I guess we should add a WARNING() to say that we're attempting to do so.

I don't understand the problem that you are seeing.

Eric

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                       ` <20071002163020.GA11207-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2007-10-02 17:16                         ` Eric W. Biederman
       [not found]                           ` <m1k5q5jv29.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
  2007-10-03  7:44                         ` Cedric Le Goater
  1 sibling, 1 reply; 28+ messages in thread
From: Eric W. Biederman @ 2007-10-02 17:16 UTC (permalink / raw)
  To: sukadev-r/Jw6+rmf7HQT0dZR+AlfA
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Cedric Le Goater

sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org writes:

> Cedric Le Goater [clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org] wrote:
> | 
> | >> however, we have an issue with the signal notification in __do_notify()
> | >> we could kill a process in a different pid namespace.
> | > 
> | > So I took a quick look at the code as it is (before this patchset)
> | > and the taking a reference to a socket and the taking a reference to
> | > a struct pid should do the right thing when we intersect with other
> | > namespaces.  It certainly does not look like a fundamental issue.
>
> | 
> | right. this should be covered when the pid namespace signal handling is 
> | complete. kill_pid_info() should fail to send a signal to a sibling or 
> | a parent pid namespace. 
> | 
> | I guess we should add a WARNING() to say that we're attempting to do so.
>
> Just want to clarify how a signal is sent to a parent ns.
>
> 	A process P1 sets itself up to be notified when a message arrives
> 	on a queue.
>
> 	P1 then clones P2 with CLONE_NEWPID.
>
> 	P2 writes to the message queue and thus signals P1
>
> What should the semantics be here ?
>
> I guess it makes less sense for two namespaces to be dependent on the same
> message queue this way.  But, if P2 writes to the queue, technically, the
> queue is not empty, so P1 should be notified, no ? 

Sounds right to me.

> This sounds similar to the SIGIO signal case (F_SETOWN). My understanding
> was that we would notify whoever was set to receive the notification, even
> if they were in a parent ns (again my reasoning was its based on the state
> of a file).

Yep.

> IOW,  should we change kill_pid_info() ?  If the caller can 'see' the
> 'struct pid' they can signal it. The expectation was that callers would
> call find_vpid() and thus only see processes in their namespace.

Ok.  Now I'm concerned.

I deliberately designed the initial pid namespace infrastructure to allow
mixing like this.  Because it is the right thing to do.

The expectation is that in general namespaces provide isolation simply
because you cannot see and thus cannot interact with other processes.
However isolation is not the purpose in life of namespaces and if you
use them in more creative ways mixing should work just fine.  But
you have to use all of the namespaces together, and you have
to carefully set things up to guarantee isolation.

The really challenging case to handle here  is what happens if we are
signaling to someone in a sibling pid namespace.  What do we set the
parent pid in the siginfo struct to.  I think we agreed that 0 (blame
the kernel) is the appropriate pid last time we talked about this.


I'm worried now that the concept of vpid has confused someone.  It
still doesn't feel right to me to call one pid value more or less
virtual then any other so the concept of a virtual pid doesn't make
sense to me.  The way I have always thought of it is:
- pid_nr(struct pid *) 
  The pid in the current pid namespace.
- __pid_nr(struct pid_namespace, struct pid *) 
  The pid in some specified pid namespace.

With struct pid being defined to be global and doing something
appropriate in all pid namespaces.

Thinking about this concern that Cedric raises is actually independent
of the mqueue namespace and seems to be totally a pid namespace thing.
Because the only way this happens if we happen to share the mqueue
namespace. (i.e. what we are doing now).


Eric

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                           ` <m1k5q5jv29.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
@ 2007-10-03  7:12                             ` Cedric Le Goater
       [not found]                               ` <470340F8.50806-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-03  7:12 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Eric W. Biederman wrote:
> sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org writes:
> 
>> Cedric Le Goater [clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org] wrote:
>> | 
>> | >> however, we have an issue with the signal notification in __do_notify()
>> | >> we could kill a process in a different pid namespace.
>> | > 
>> | > So I took a quick look at the code as it is (before this patchset)
>> | > and the taking a reference to a socket and the taking a reference to
>> | > a struct pid should do the right thing when we intersect with other
>> | > namespaces.  It certainly does not look like a fundamental issue.
>>
>> | 
>> | right. this should be covered when the pid namespace signal handling is 
>> | complete. kill_pid_info() should fail to send a signal to a sibling or 
>> | a parent pid namespace. 
>> | 
>> | I guess we should add a WARNING() to say that we're attempting to do so.
>>
>> Just want to clarify how a signal is sent to a parent ns.
>>
>> 	A process P1 sets itself up to be notified when a message arrives
>> 	on a queue.
>>
>> 	P1 then clones P2 with CLONE_NEWPID.
>>
>> 	P2 writes to the message queue and thus signals P1
>>
>> What should the semantics be here ?
>>
>> I guess it makes less sense for two namespaces to be dependent on the same
>> message queue this way.  But, if P2 writes to the queue, technically, the
>> queue is not empty, so P1 should be notified, no ? 
> 
> Sounds right to me.

It's right for the mqueue namespace but wrong for the pid namespace because
we will possibly send a signal to a sibling pid namespace.
 
>> This sounds similar to the SIGIO signal case (F_SETOWN). My understanding
>> was that we would notify whoever was set to receive the notification, even
>> if they were in a parent ns (again my reasoning was its based on the state
>> of a file).
> 
> Yep.
> 
>> IOW,  should we change kill_pid_info() ?  If the caller can 'see' the
>> 'struct pid' they can signal it. The expectation was that callers would
>> call find_vpid() and thus only see processes in their namespace.
> 
> Ok.  Now I'm concerned.
> 
> I deliberately designed the initial pid namespace infrastructure to allow
> mixing like this.  Because it is the right thing to do.
> 
> The expectation is that in general namespaces provide isolation simply
> because you cannot see and thus cannot interact with other processes.
> However isolation is not the purpose in life of namespaces and if you
> use them in more creative ways mixing should work just fine.  But
> you have to use all of the namespaces together, and you have
> to carefully set things up to guarantee isolation.
> 
> The really challenging case to handle here  is what happens if we are
> signaling to someone in a sibling pid namespace.  What do we set the
> parent pid in the siginfo struct to.  I think we agreed that 0 (blame
> the kernel) is the appropriate pid last time we talked about this.

0 seems appropriate for signal coming from a parent namespace, yes. but
here we could be sending a signal from 

> I'm worried now that the concept of vpid has confused someone.  It
> still doesn't feel right to me to call one pid value more or less
> virtual then any other so the concept of a virtual pid doesn't make
> sense to me.  The way I have always thought of it is:
> - pid_nr(struct pid *) 
>   The pid in the current pid namespace.
> - __pid_nr(struct pid_namespace, struct pid *) 
>   The pid in some specified pid namespace.
> 
> With struct pid being defined to be global and doing something
> appropriate in all pid namespaces.
> 
> Thinking about this concern that Cedric raises is actually independent
> of the mqueue namespace and seems to be totally a pid namespace thing.
> Because the only way this happens if we happen to share the mqueue
> namespace. (i.e. what we are doing now).
> 
> 
> Eric

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                               ` <470340F8.50806-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2007-10-03  7:38                                 ` Cedric Le Goater
       [not found]                                   ` <47034712.9060101-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-03  7:38 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Eric W. Biederman

[ 
  I have big fingers this morning and I managed to send this email 
  while typing it ... see below for the end. I should be awake now :) 
]

> The really challenging case to handle here  is what happens if we are
> signaling to someone in a sibling pid namespace.  What do we set the
> parent pid in the siginfo struct to.  I think we agreed that 0 (blame
> the kernel) is the appropriate pid last time we talked about this.
 
0 seems appropriate for a signal coming from a parent namespace, yes. but
here, we could be sending a signal from a child or sibling namespace.

> I'm worried now that the concept of vpid has confused someone.  It
> still doesn't feel right to me to call one pid value more or less
> virtual then any other so the concept of a virtual pid doesn't make
> sense to me.  The way I have always thought of it is:
> - pid_nr(struct pid *) 
>   The pid in the current pid namespace.
> - __pid_nr(struct pid_namespace, struct pid *) 
>   The pid in some specified pid namespace.
>
> With struct pid being defined to be global and doing something
> appropriate in all pid namespaces.
>
> Thinking about this concern that Cedric raises is actually independent
> of the mqueue namespace and seems to be totally a pid namespace thing.
> Because the only way this happens if we happen to share the mqueue
> namespace. (i.e. what we are doing now).

Is there a way to catch this general issue (we have the same in sigio)
in the kill*(struct pid) routines ? spit a big warning when the
current->nsproxy->pid_ns is not a parent ?

Thanks !

C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                       ` <20071002163020.GA11207-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2007-10-02 17:16                         ` Eric W. Biederman
@ 2007-10-03  7:44                         ` Cedric Le Goater
       [not found]                           ` <47034868.5010505-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  1 sibling, 1 reply; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-03  7:44 UTC (permalink / raw)
  To: sukadev-r/Jw6+rmf7HQT0dZR+AlfA
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Eric W. Biederman

sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org wrote:
> Cedric Le Goater [clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org] wrote:
> | 
> | >> however, we have an issue with the signal notification in __do_notify()
> | >> we could kill a process in a different pid namespace.
> | > 
> | > So I took a quick look at the code as it is (before this patchset)
> | > and the taking a reference to a socket and the taking a reference to
> | > a struct pid should do the right thing when we intersect with other
> | > namespaces.  It certainly does not look like a fundamental issue.
> 
> | 
> | right. this should be covered when the pid namespace signal handling is 
> | complete. kill_pid_info() should fail to send a signal to a sibling or 
> | a parent pid namespace. 
> | 
> | I guess we should add a WARNING() to say that we're attempting to do so.
> 
> Just want to clarify how a signal is sent to a parent ns.
> 
> 	A process P1 sets itself up to be notified when a message arrives
> 	on a queue.
> 
> 	P1 then clones P2 with CLONE_NEWPID.
> 
> 	P2 writes to the message queue and thus signals P1
> 
> What should the semantics be here ?
> 
> I guess it makes less sense for two namespaces to be dependent on the same
> message queue this way.  But, if P2 writes to the queue, technically, the
> queue is not empty, so P1 should be notified, no ? 
> 
> This sounds similar to the SIGIO signal case (F_SETOWN). My understanding
> was that we would notify whoever was set to receive the notification, even
> if they were in a parent ns (again my reasoning was its based on the state
> of a file).

yes

> IOW,  should we change kill_pid_info() ?  If the caller can 'see' the
> 'struct pid' they can signal it. The expectation was that callers would
> call find_vpid() and thus only see processes in their namespace.

I think we have to decide on some limitations with signals and make sure 
that we cannot send a signal to a sibling pid namespace. This can occur
in some special namespaces unshare configuration which should never be used 
but to make sure, let's add a big WARNING when we detect such a pid namespace
violation.

If it is what you mean, I agree :)

Thanks,

C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                           ` <47034868.5010505-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2007-10-03 13:59                             ` Serge E. Hallyn
       [not found]                               ` <20071003135955.GA7934-6s5zFf/epYLPQpwDFJZrxKsjOiXwFzmk@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Serge E. Hallyn @ 2007-10-03 13:59 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Eric W. Biederman

Quoting Cedric Le Goater (clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org):
> sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org wrote:
> > Cedric Le Goater [clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org] wrote:
> > | 
> > | >> however, we have an issue with the signal notification in __do_notify()
> > | >> we could kill a process in a different pid namespace.
> > | > 
> > | > So I took a quick look at the code as it is (before this patchset)
> > | > and the taking a reference to a socket and the taking a reference to
> > | > a struct pid should do the right thing when we intersect with other
> > | > namespaces.  It certainly does not look like a fundamental issue.
> > 
> > | 
> > | right. this should be covered when the pid namespace signal handling is 
> > | complete. kill_pid_info() should fail to send a signal to a sibling or 
> > | a parent pid namespace. 
> > | 
> > | I guess we should add a WARNING() to say that we're attempting to do so.
> > 
> > Just want to clarify how a signal is sent to a parent ns.
> > 
> > 	A process P1 sets itself up to be notified when a message arrives
> > 	on a queue.
> > 
> > 	P1 then clones P2 with CLONE_NEWPID.
> > 
> > 	P2 writes to the message queue and thus signals P1
> > 
> > What should the semantics be here ?
> > 
> > I guess it makes less sense for two namespaces to be dependent on the same
> > message queue this way.  But, if P2 writes to the queue, technically, the
> > queue is not empty, so P1 should be notified, no ? 
> > 
> > This sounds similar to the SIGIO signal case (F_SETOWN). My understanding
> > was that we would notify whoever was set to receive the notification, even
> > if they were in a parent ns (again my reasoning was its based on the state
> > of a file).
> 
> yes
> 
> > IOW,  should we change kill_pid_info() ?  If the caller can 'see' the
> > 'struct pid' they can signal it. The expectation was that callers would
> > call find_vpid() and thus only see processes in their namespace.
> 
> I think we have to decide on some limitations with signals 

Yes we do, but

> and make sure 
> that we cannot send a signal to a sibling pid namespace.

I think you and Eric (and I) are disagreeing about those limitations.
You take it for granted that a sibling pidns is off limits for signals.
But the signal wasn't sent using a pid, but using a file (in SIGIO
case).  So since the fs was shared, the signal should be sent.  An
event happened, and the receiver wants to know about it.

> This can occur
> in some special namespaces unshare configuration which should never be used 
> but to make sure, let's add a big WARNING when we detect such a pid namespace
> violation.
> 
> If it is what you mean, I agree :)
> 
> Thanks,
> 
> C.
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                               ` <20071003135955.GA7934-6s5zFf/epYLPQpwDFJZrxKsjOiXwFzmk@public.gmane.org>
@ 2007-10-03 14:11                                 ` Cedric Le Goater
       [not found]                                   ` <4703A30F.2010007-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-03 14:11 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Eric W. Biederman

> I think you and Eric (and I) are disagreeing about those limitations.
> You take it for granted that a sibling pidns is off limits for signals.
> But the signal wasn't sent using a pid, but using a file (in SIGIO
> case).  So since the fs was shared, the signal should be sent.  An
> event happened, and the receiver wants to know about it.

seen that way I agree. 

si_code is set to SI_MESGQ, but what do we put in si_pid ? 0 ?

we could use the si_errno to pass extra info, like the sending process 
lives in a // world ...

C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                                   ` <47034712.9060101-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2007-10-03 14:32                                     ` Eric W. Biederman
  0 siblings, 0 replies; 28+ messages in thread
From: Eric W. Biederman @ 2007-10-03 14:32 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: Denis V. Lunev,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org> writes:

> [ 
>   I have big fingers this morning and I managed to send this email 
>   while typing it ... see below for the end. I should be awake now :) 
> ]
>
>> The really challenging case to handle here  is what happens if we are
>> signaling to someone in a sibling pid namespace.  What do we set the
>> parent pid in the siginfo struct to.  I think we agreed that 0 (blame
>> the kernel) is the appropriate pid last time we talked about this.
>  
> 0 seems appropriate for a signal coming from a parent namespace, yes. but
> here, we could be sending a signal from a child or sibling namespace.

Hmm.  I finally see the part of this that is problematic and
that we aren't currently handling.

> sig_i.si_signo = info->notify.sigev_signo;
> sig_i.si_errno = 0;
> sig_i.si_code = SI_MESGQ;
> sig_i.si_value = info->notify.sigev_value;
> sig_i.si_pid = current->tgid;
> sig_i.si_uid = current->uid;
> 
> kill_pid_info(info->notify.sigev_signo, &sig_i, info->notify_owner);

The filling in of the signal info structure.  My gut feel says that
should be a struct pid reference.    We need to be very careful with
the siginfo cases, and si_pid.  Unless someone has built a mechanism
for dealing with this I haven't seen.

I still think the right approach here is that if the pid doesn't map
into the target processes pid namespace we should use 0.

I also strongly suspect that si_pid should be a struct pid and that we
should translate it in the receiving process if possible.

> Is there a way to catch this general issue (we have the same in sigio)
> in the kill*(struct pid) routines ? spit a big warning when the
> current->nsproxy->pid_ns is not a parent ?

The SIGIO case is even trickier, although that may count as always
coming from the kernel so this doesn't come up.


In both cases what is really happening a process in a sibling
pid namespace is doing something and the kernel is telling us about
it.  So sending the signal is legitimate.

The difficulty is that the kernel can't express the process that did
something.  From my perspective that appears to be a fundamental
limitation, and our only real choice is to send 0.

We have a similar limitation with the uid namespace as well in this
case.

We can implement this easily by passing a struct pid.  And translating
just before we cross the kernel/user boundary.  And if it doesn't
translate use 0.  Basically this is just your check for seeing
if the destination pid namespace is the same or a parent of
the sending pid namespace.  That is what I always envisioned
being in __pid_nr().  As I don't think it makes any sense to map
pids from sibling pid namespaces.

The reason we want in general to do the translation at the
destination process is because we may be sending a signal
to a process group which could have processes in multiple
pid namespaces.

Eric

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                                   ` <4703A30F.2010007-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2007-10-03 16:56                                     ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA
       [not found]                                       ` <20071003165644.GA338-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: sukadev-r/Jw6+rmf7HQT0dZR+AlfA @ 2007-10-03 16:56 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Eric W. Biederman, Denis V. Lunev

Cedric Le Goater [clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org] wrote:
| > I think you and Eric (and I) are disagreeing about those limitations.
| > You take it for granted that a sibling pidns is off limits for signals.
| > But the signal wasn't sent using a pid, but using a file (in SIGIO
| > case).  So since the fs was shared, the signal should be sent.  An
| > event happened, and the receiver wants to know about it.
| 
| seen that way I agree. 
| 
| si_code is set to SI_MESGQ, but what do we put in si_pid ? 0 ?
| 
| we could use the si_errno to pass extra info, like the sending process 
| lives in a // world ...

Does the receiver need to know that sender is in a // world ? 

| 
| C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                                       ` <20071003165644.GA338-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2007-10-03 17:03                                         ` Eric W. Biederman
       [not found]                                           ` <m1myv0ceqp.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Eric W. Biederman @ 2007-10-03 17:03 UTC (permalink / raw)
  To: sukadev-r/Jw6+rmf7HQT0dZR+AlfA
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Cedric Le Goater, Denis V. Lunev

sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org writes:

> Cedric Le Goater [clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org] wrote:
> | > I think you and Eric (and I) are disagreeing about those limitations.
> | > You take it for granted that a sibling pidns is off limits for signals.
> | > But the signal wasn't sent using a pid, but using a file (in SIGIO
> | > case).  So since the fs was shared, the signal should be sent.  An
> | > event happened, and the receiver wants to know about it.
> | 
> | seen that way I agree. 
> | 
> | si_code is set to SI_MESGQ, but what do we put in si_pid ? 0 ?
> | 
> | we could use the si_errno to pass extra info, like the sending process 
> | lives in a // world ...
>
> Does the receiver need to know that sender is in a // world ? 

What is a // world ?

Eric

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                                           ` <m1myv0ceqp.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
@ 2007-10-03 17:09                                             ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA
       [not found]                                               ` <20071003170930.GB338-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: sukadev-r/Jw6+rmf7HQT0dZR+AlfA @ 2007-10-03 17:09 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Cedric Le Goater, Denis V. Lunev

Eric W. Biederman [ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org] wrote:
| sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org writes:
| 
| > Cedric Le Goater [clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org] wrote:
| > | > I think you and Eric (and I) are disagreeing about those limitations.
| > | > You take it for granted that a sibling pidns is off limits for signals.
| > | > But the signal wasn't sent using a pid, but using a file (in SIGIO
| > | > case).  So since the fs was shared, the signal should be sent.  An
| > | > event happened, and the receiver wants to know about it.
| > | 
| > | seen that way I agree. 
| > | 
| > | si_code is set to SI_MESGQ, but what do we put in si_pid ? 0 ?
| > | 
| > | we could use the si_errno to pass extra info, like the sending process 
| > | lives in a // world ...
| >
| > Does the receiver need to know that sender is in a // world ? 
| 
| What is a // world ?

Parallel world/universe :-)

I am assuming Cedric used that to refer to a sibling pid ns.

| 
| Eric

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                                               ` <20071003170930.GB338-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2007-10-04 13:12                                                 ` Cedric Le Goater
       [not found]                                                   ` <4704E6C7.5010908-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Cedric Le Goater @ 2007-10-04 13:12 UTC (permalink / raw)
  To: sukadev-r/Jw6+rmf7HQT0dZR+AlfA
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Eric W. Biederman, Denis V. Lunev

sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org wrote:
> Eric W. Biederman [ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org] wrote:
> | sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org writes:
> | 
> | > Cedric Le Goater [clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org] wrote:
> | > | > I think you and Eric (and I) are disagreeing about those limitations.
> | > | > You take it for granted that a sibling pidns is off limits for signals.
> | > | > But the signal wasn't sent using a pid, but using a file (in SIGIO
> | > | > case).  So since the fs was shared, the signal should be sent.  An
> | > | > event happened, and the receiver wants to know about it.
> | > | 
> | > | seen that way I agree. 
> | > | 
> | > | si_code is set to SI_MESGQ, but what do we put in si_pid ? 0 ?
> | > | 
> | > | we could use the si_errno to pass extra info, like the sending process 
> | > | lives in a // world ...
> | >
> | > Does the receiver need to know that sender is in a // world ? 

probably not. it would mean that the user is container aware. bad idea.
 
> | What is a // world ?
> 
> Parallel world/universe :-)
> 
> I am assuming Cedric used that to refer to a sibling pid ns.

yes :) 
 
Thanks !

C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch -mm 1/5] mqueue namespace : add struct mq_namespace
       [not found]                                                   ` <4704E6C7.5010908-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2007-10-04 13:32                                                     ` Serge E. Hallyn
  0 siblings, 0 replies; 28+ messages in thread
From: Serge E. Hallyn @ 2007-10-04 13:32 UTC (permalink / raw)
  To: Cedric Le Goater
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	Eric W. Biederman, Denis V. Lunev

Quoting Cedric Le Goater (clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org):
> sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org wrote:
> > Eric W. Biederman [ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org] wrote:
> > | sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org writes:
> > | 
> > | > Cedric Le Goater [clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org] wrote:
> > | > | > I think you and Eric (and I) are disagreeing about those limitations.
> > | > | > You take it for granted that a sibling pidns is off limits for signals.
> > | > | > But the signal wasn't sent using a pid, but using a file (in SIGIO
> > | > | > case).  So since the fs was shared, the signal should be sent.  An
> > | > | > event happened, and the receiver wants to know about it.
> > | > | 
> > | > | seen that way I agree. 
> > | > | 
> > | > | si_code is set to SI_MESGQ, but what do we put in si_pid ? 0 ?
> > | > | 
> > | > | we could use the si_errno to pass extra info, like the sending process 
> > | > | lives in a // world ...
> > | >
> > | > Does the receiver need to know that sender is in a // world ? 
> 
> probably not. it would mean that the user is container aware. bad idea.

Remember we don't have to hide the fact that the user is in a
container.  Just enough to make it convenient, but not to the
point of going out of our way to try and hide the fact for no
other reason than to hide the fact.

> > | What is a // world ?
> > 
> > Parallel world/universe :-)
> > 
> > I am assuming Cedric used that to refer to a sibling pid ns.
> 
> yes :) 
> 
> Thanks !
> 
> C.

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2007-10-04 13:32 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20071002084608.149781400@fr.ibm.com>
2007-10-02  8:46 ` [patch -mm 1/5] mqueue namespace : add struct mq_namespace Cedric Le Goater
2007-10-02  8:46 ` [patch -mm 2/5] mqueue namespace : add unshare support Cedric Le Goater
2007-10-02  8:46 ` [patch -mm 3/5] mqueue namespace : add get_sb_single_per_data() Cedric Le Goater
2007-10-02  8:46 ` [patch -mm 4/5] mqueue namespace : enable the namespace Cedric Le Goater
2007-10-02  8:46 ` [patch -mm 5/5] mqueue namespace : make sysctl work per namespace Cedric Le Goater
     [not found] ` <20071002084906.477406083@fr.ibm.com>
     [not found]   ` <20071002084906.477406083-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-02  9:06     ` [patch -mm 1/5] mqueue namespace : add struct mq_namespace Kirill Korotaev
     [not found]       ` <47020A29.9060403-3ImXcnM4P+0@public.gmane.org>
2007-10-02 10:13         ` Cedric Le Goater
     [not found]           ` <470219BC.3050702-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-02 10:59             ` Eric W. Biederman
     [not found]               ` <m1ir5plr3c.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
2007-10-02 12:21                 ` Cedric Le Goater
     [not found]                   ` <470237C7.5000902-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-02 16:30                     ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA
     [not found]                       ` <20071002163020.GA11207-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2007-10-02 17:16                         ` Eric W. Biederman
     [not found]                           ` <m1k5q5jv29.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
2007-10-03  7:12                             ` Cedric Le Goater
     [not found]                               ` <470340F8.50806-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-03  7:38                                 ` Cedric Le Goater
     [not found]                                   ` <47034712.9060101-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-03 14:32                                     ` Eric W. Biederman
2007-10-03  7:44                         ` Cedric Le Goater
     [not found]                           ` <47034868.5010505-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-03 13:59                             ` Serge E. Hallyn
     [not found]                               ` <20071003135955.GA7934-6s5zFf/epYLPQpwDFJZrxKsjOiXwFzmk@public.gmane.org>
2007-10-03 14:11                                 ` Cedric Le Goater
     [not found]                                   ` <4703A30F.2010007-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-03 16:56                                     ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA
     [not found]                                       ` <20071003165644.GA338-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2007-10-03 17:03                                         ` Eric W. Biederman
     [not found]                                           ` <m1myv0ceqp.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
2007-10-03 17:09                                             ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA
     [not found]                                               ` <20071003170930.GB338-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2007-10-04 13:12                                                 ` Cedric Le Goater
     [not found]                                                   ` <4704E6C7.5010908-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-04 13:32                                                     ` Serge E. Hallyn
2007-10-02 17:02                     ` Eric W. Biederman
     [not found] ` <20071002084608.149781400-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2007-10-02 10:50   ` [patch -mm 0/5] mqueue namespace Eric W. Biederman
     [not found]     ` <m1przxlrim.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
2007-10-02 11:06       ` Cedric Le Goater
2007-10-02 11:01   ` Eric W. Biederman
     [not found]     ` <m1ejgdlr0a.fsf-T1Yj925okcoyDheHMi7gv2pdwda3JcWeAL8bYrjMMd8@public.gmane.org>
2007-10-02 11:41       ` Cedric Le Goater
2007-10-02  8:46 Cedric Le Goater

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.