All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC patch 0/2] posix mqueue namespace (v11)
@ 2008-12-15 23:37 Serge E. Hallyn
       [not found] ` <20081215233747.GA27553-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Serge E. Hallyn @ 2008-12-15 23:37 UTC (permalink / raw)
  To: Linux Containers

(Ok I don't know what the actual version number is - it's
high but 11 is probably safe)

Cedric and Nadia took several approaches to making posix
message queues per-namespace.  I ended up mamking some
deep changes so am not retaining their Signed-off-by:s
on this version, but this is definately very much based
on work by both of them.

Patch 2 hopefully explains my approach.  Briefly,

	1. sysv and posix ipc are both under CLONE_NEWIPC
	2. the mqueue sb is per-ipc-namespace

So to create a new ipc namespace, you would

	unshare(CLONE_NEWIPC|CLONE_NEWNS);
	umount /dev/mqueue
	mount -t mqueue mqueue /dev/mqueue

It's perfectly valid to do vfs operations on files
in another ipc_namespace's /dev/mqueue, but any use
of mq_open(3) and friends will act in your own ipc_ns.

thanks,
-serge

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC PATCH 1/2] mqueue ns: move mqueue_mnt into struct ipc_namespace
       [not found] ` <20081215233747.GA27553-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2008-12-15 23:38   ` Serge E. Hallyn
  2008-12-15 23:38   ` [RFC PATCH 2/2] ipc namespaces: implement support for posix msqueues Serge E. Hallyn
  2008-12-16  8:11   ` [RFC patch 0/2] posix mqueue namespace (v11) Cedric Le Goater
  2 siblings, 0 replies; 5+ messages in thread
From: Serge E. Hallyn @ 2008-12-15 23:38 UTC (permalink / raw)
  To: Linux Containers

Move mqueue vfsmount plus a few tunables into the
ipc_namespace struct.  The CONFIG_IPC_NS boolean
and the ipc_namespace struct will serve both the
posix message queue namespaces and the SYSV ipc
namespaces.

Largely based on previous version by Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org> and
Nadia Derbey <Nadia.Derbey-6ktuUTfB/bM@public.gmane.org>.

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 include/linux/ipc_namespace.h |   32 ++++++++++--
 init/Kconfig                  |    4 +-
 ipc/mqueue.c                  |  116 ++++++++++++++++++++++-------------------
 ipc/msgutil.c                 |   21 +++++++
 ipc/namespace.c               |    2 +
 ipc/util.c                    |    9 ---
 ipc/util.h                    |   15 +++++
 7 files changed, 131 insertions(+), 68 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index ea330f9..532598f 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -44,24 +44,48 @@ struct ipc_namespace {
 	int		shm_tot;
 
 	struct notifier_block ipcns_nb;
+
+	struct vfsmount	*mq_mnt;
+	unsigned int    mq_queues_count;
+	unsigned int    mq_queues_max;
+	unsigned int    mq_msg_max;
+	unsigned int    mq_msgsize_max;
+
 };
 
 extern struct ipc_namespace init_ipc_ns;
 extern atomic_t nr_ipc_ns;
 
-#ifdef CONFIG_SYSVIPC
+#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
 #define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
+#else
+#define INIT_IPC_NS(ns)
+#endif
 
+#ifdef CONFIG_SYSVIPC
 extern int register_ipcns_notifier(struct ipc_namespace *);
 extern int cond_register_ipcns_notifier(struct ipc_namespace *);
 extern void unregister_ipcns_notifier(struct ipc_namespace *);
 extern int ipcns_notify(unsigned long);
-
 #else /* CONFIG_SYSVIPC */
-#define INIT_IPC_NS(ns)
+#define register_ipcns_notifier(ns)
+#define cond_register_ipcns_notifier(ns)
+#define unregister_ipcns_notifier(ns)
+#define ipcns_notify(l)
 #endif /* CONFIG_SYSVIPC */
 
-#if defined(CONFIG_SYSVIPC) && defined(CONFIG_IPC_NS)
+#ifdef CONFIG_POSIX_MQUEUE
+extern void mq_init_ns(struct ipc_namespace *ns);
+/* default values */
+#define DFLT_QUEUESMAX 256     /* max number of message queues */
+#define DFLT_MSGMAX    10      /* max number of messages in each queue */
+#define HARD_MSGMAX    (131072/sizeof(void *))
+#define DFLT_MSGSIZEMAX 8192   /* max message size */
+#else
+#define mq_init_ns(ns)
+#endif
+
+#if defined(CONFIG_IPC_NS)
 extern void free_ipc_ns(struct kref *kref);
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
 				       struct ipc_namespace *ns);
diff --git a/init/Kconfig b/init/Kconfig
index ce75d2d..32c6315 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -489,10 +489,10 @@ config UTS_NS
 
 config IPC_NS
 	bool "IPC namespace"
-	depends on NAMESPACES && SYSVIPC
+	depends on NAMESPACES && (SYSVIPC || POSIX_MQUEUE)
 	help
 	  In this namespace tasks work with IPC ids which correspond to
-	  different IPC objects in different namespaces
+	  different IPC objects in different namespaces.
 
 config USER_NS
 	bool "User namespace (EXPERIMENTAL)"
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index d9393f8..01d64a0 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -31,6 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/nsproxy.h>
 #include <linux/pid.h>
+#include <linux/ipc_namespace.h>
 
 #include <net/sock.h>
 #include "util.h"
@@ -46,12 +47,6 @@
 #define STATE_PENDING	1
 #define STATE_READY	2
 
-/* default values */
-#define DFLT_QUEUESMAX	256	/* max number of message queues */
-#define DFLT_MSGMAX 	10	/* max number of messages in each queue */
-#define HARD_MSGMAX 	(131072/sizeof(void*))
-#define DFLT_MSGSIZEMAX 8192	/* max message size */
-
 /*
  * Define the ranges various user-specified maximum values can
  * be set to.
@@ -95,12 +90,6 @@ static void remove_notification(struct mqueue_inode_info *info);
 
 static spinlock_t mq_lock;
 static struct kmem_cache *mqueue_inode_cachep;
-static struct vfsmount *mqueue_mnt;
-
-static unsigned int queues_count;
-static unsigned int queues_max 	= DFLT_QUEUESMAX;
-static unsigned int msg_max 	= DFLT_MSGMAX;
-static unsigned int msgsize_max = DFLT_MSGSIZEMAX;
 
 static struct ctl_table_header * mq_sysctl_table;
 
@@ -109,11 +98,25 @@ static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
 	return container_of(inode, struct mqueue_inode_info, vfs_inode);
 }
 
+void mq_init_ns(struct ipc_namespace *ns) {
+	ns->mq_queues_count  = 0;
+	ns->mq_queues_max    = DFLT_QUEUESMAX;
+	ns->mq_msg_max       = DFLT_MSGMAX;
+	ns->mq_msgsize_max   = DFLT_MSGSIZEMAX;
+	ns->mq_mnt           = mntget(init_ipc_ns.mq_mnt);
+}
+
+void mq_exit_ns(struct ipc_namespace *ns) {
+	/* will need to clear out ns->mq_mnt->mnt_sb->s_fs_info here */
+	mntput(ns->mq_mnt);
+}
+
 static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 							struct mq_attr *attr)
 {
 	struct user_struct *u = current_user();
 	struct inode *inode;
+	struct ipc_namespace *ipc_ns = &init_ipc_ns;
 
 	inode = new_inode(sb);
 	if (inode) {
@@ -142,8 +145,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 			info->qsize = 0;
 			info->user = NULL;	/* set when all is ok */
 			memset(&info->attr, 0, sizeof(info->attr));
-			info->attr.mq_maxmsg = msg_max;
-			info->attr.mq_msgsize = msgsize_max;
+			info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
+			info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
 			if (attr) {
 				info->attr.mq_maxmsg = attr->mq_maxmsg;
 				info->attr.mq_msgsize = attr->mq_msgsize;
@@ -243,6 +246,7 @@ static void mqueue_delete_inode(struct inode *inode)
 	struct user_struct *user;
 	unsigned long mq_bytes;
 	int i;
+	struct ipc_namespace *ipc_ns = &init_ipc_ns;
 
 	if (S_ISDIR(inode->i_mode)) {
 		clear_inode(inode);
@@ -263,7 +267,7 @@ static void mqueue_delete_inode(struct inode *inode)
 	if (user) {
 		spin_lock(&mq_lock);
 		user->mq_bytes -= mq_bytes;
-		queues_count--;
+		ipc_ns->mq_queues_count--;
 		spin_unlock(&mq_lock);
 		free_uid(user);
 	}
@@ -275,20 +279,22 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
 	struct inode *inode;
 	struct mq_attr *attr = dentry->d_fsdata;
 	int error;
+	struct ipc_namespace *ipc_ns = &init_ipc_ns;
 
 	spin_lock(&mq_lock);
-	if (queues_count >= queues_max && !capable(CAP_SYS_RESOURCE)) {
+	if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
+			!capable(CAP_SYS_RESOURCE)) {
 		error = -ENOSPC;
 		goto out_lock;
 	}
-	queues_count++;
+	ipc_ns->mq_queues_count++;
 	spin_unlock(&mq_lock);
 
 	inode = mqueue_get_inode(dir->i_sb, mode, attr);
 	if (!inode) {
 		error = -ENOMEM;
 		spin_lock(&mq_lock);
-		queues_count--;
+		ipc_ns->mq_queues_count--;
 		goto out_lock;
 	}
 
@@ -566,7 +572,7 @@ static void remove_notification(struct mqueue_inode_info *info)
 	info->notify_owner = NULL;
 }
 
-static int mq_attr_ok(struct mq_attr *attr)
+static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
 {
 	if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
 		return 0;
@@ -574,8 +580,8 @@ static int mq_attr_ok(struct mq_attr *attr)
 		if (attr->mq_maxmsg > HARD_MSGMAX)
 			return 0;
 	} else {
-		if (attr->mq_maxmsg > msg_max ||
-				attr->mq_msgsize > msgsize_max)
+		if (attr->mq_maxmsg > ipc_ns->mq_msg_max ||
+				attr->mq_msgsize > ipc_ns->mq_msgsize_max)
 			return 0;
 	}
 	/* check for overflow */
@@ -591,8 +597,9 @@ static int mq_attr_ok(struct mq_attr *attr)
 /*
  * Invoked when creating a new queue via sys_mq_open
  */
-static struct file *do_create(struct dentry *dir, struct dentry *dentry,
-			int oflag, mode_t mode, struct mq_attr __user *u_attr)
+static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
+			struct dentry *dentry, int oflag, mode_t mode,
+			struct mq_attr __user *u_attr)
 {
 	const struct cred *cred = current_cred();
 	struct mq_attr attr;
@@ -604,14 +611,14 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
 		if (copy_from_user(&attr, u_attr, sizeof(attr)))
 			goto out;
 		ret = -EINVAL;
-		if (!mq_attr_ok(&attr))
+		if (!mq_attr_ok(ipc_ns, &attr))
 			goto out;
 		/* store for use during create */
 		dentry->d_fsdata = &attr;
 	}
 
 	mode &= ~current->fs->umask;
-	ret = mnt_want_write(mqueue_mnt);
+	ret = mnt_want_write(ipc_ns->mq_mnt);
 	if (ret)
 		goto out;
 	ret = vfs_create(dir->d_inode, dentry, mode, NULL);
@@ -619,24 +626,25 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
 	if (ret)
 		goto out_drop_write;
 
-	result = dentry_open(dentry, mqueue_mnt, oflag, cred);
+	result = dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
 	/*
 	 * dentry_open() took a persistent mnt_want_write(),
 	 * so we can now drop this one.
 	 */
-	mnt_drop_write(mqueue_mnt);
+	mnt_drop_write(ipc_ns->mq_mnt);
 	return result;
 
 out_drop_write:
-	mnt_drop_write(mqueue_mnt);
+	mnt_drop_write(ipc_ns->mq_mnt);
 out:
 	dput(dentry);
-	mntput(mqueue_mnt);
+	mntput(ipc_ns->mq_mnt);
 	return ERR_PTR(ret);
 }
 
 /* Opens existing queue */
-static struct file *do_open(struct dentry *dentry, int oflag)
+static struct file *do_open(struct ipc_namespace *ipc_ns,
+				struct dentry *dentry, int oflag)
 {
 	const struct cred *cred = current_cred();
 
@@ -645,17 +653,17 @@ static struct file *do_open(struct dentry *dentry, int oflag)
 
 	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
 		dput(dentry);
-		mntput(mqueue_mnt);
+		mntput(ipc_ns->mq_mnt);
 		return ERR_PTR(-EINVAL);
 	}
 
 	if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
 		dput(dentry);
-		mntput(mqueue_mnt);
+		mntput(ipc_ns->mq_mnt);
 		return ERR_PTR(-EACCES);
 	}
 
-	return dentry_open(dentry, mqueue_mnt, oflag, cred);
+	return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
 }
 
 asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
@@ -665,6 +673,7 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 	struct file *filp;
 	char *name;
 	int fd, error;
+	struct ipc_namespace *ipc_ns = &init_ipc_ns;
 
 	error = audit_mq_open(oflag, mode, u_attr);
 	if (error != 0)
@@ -677,13 +686,13 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 	if (fd < 0)
 		goto out_putname;
 
-	mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
-	dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
+	mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
+	dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
 	if (IS_ERR(dentry)) {
 		error = PTR_ERR(dentry);
 		goto out_err;
 	}
-	mntget(mqueue_mnt);
+	mntget(ipc_ns->mq_mnt);
 
 	if (oflag & O_CREAT) {
 		if (dentry->d_inode) {	/* entry already exists */
@@ -691,17 +700,17 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 			error = -EEXIST;
 			if (oflag & O_EXCL)
 				goto out;
-			filp = do_open(dentry, oflag);
+			filp = do_open(ipc_ns, dentry, oflag);
 		} else {
-			filp = do_create(mqueue_mnt->mnt_root, dentry,
-						oflag, mode, u_attr);
+			filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root,
+						dentry, oflag, mode, u_attr);
 		}
 	} else {
 		error = -ENOENT;
 		if (!dentry->d_inode)
 			goto out;
 		audit_inode(name, dentry);
-		filp = do_open(dentry, oflag);
+		filp = do_open(ipc_ns, dentry, oflag);
 	}
 
 	if (IS_ERR(filp)) {
@@ -714,13 +723,13 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 
 out:
 	dput(dentry);
-	mntput(mqueue_mnt);
+	mntput(ipc_ns->mq_mnt);
 out_putfd:
 	put_unused_fd(fd);
 out_err:
 	fd = error;
 out_upsem:
-	mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
+	mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
 out_putname:
 	putname(name);
 	return fd;
@@ -732,14 +741,15 @@ asmlinkage long sys_mq_unlink(const char __user *u_name)
 	char *name;
 	struct dentry *dentry;
 	struct inode *inode = NULL;
+	struct ipc_namespace *ipc_ns = &init_ipc_ns;
 
 	name = getname(u_name);
 	if (IS_ERR(name))
 		return PTR_ERR(name);
 
-	mutex_lock_nested(&mqueue_mnt->mnt_root->d_inode->i_mutex,
+	mutex_lock_nested(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex,
 			I_MUTEX_PARENT);
-	dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
+	dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
 	if (IS_ERR(dentry)) {
 		err = PTR_ERR(dentry);
 		goto out_unlock;
@@ -753,16 +763,16 @@ asmlinkage long sys_mq_unlink(const char __user *u_name)
 	inode = dentry->d_inode;
 	if (inode)
 		atomic_inc(&inode->i_count);
-	err = mnt_want_write(mqueue_mnt);
+	err = mnt_want_write(ipc_ns->mq_mnt);
 	if (err)
 		goto out_err;
 	err = vfs_unlink(dentry->d_parent->d_inode, dentry);
-	mnt_drop_write(mqueue_mnt);
+	mnt_drop_write(ipc_ns->mq_mnt);
 out_err:
 	dput(dentry);
 
 out_unlock:
-	mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
+	mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
 	putname(name);
 	if (inode)
 		iput(inode);
@@ -1211,14 +1221,14 @@ static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
 static ctl_table mq_sysctls[] = {
 	{
 		.procname	= "queues_max",
-		.data		= &queues_max,
+		.data		= &init_ipc_ns.mq_queues_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
 	{
 		.procname	= "msg_max",
-		.data		= &msg_max,
+		.data		= &init_ipc_ns.mq_msg_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
@@ -1227,7 +1237,7 @@ static ctl_table mq_sysctls[] = {
 	},
 	{
 		.procname	= "msgsize_max",
-		.data		= &msgsize_max,
+		.data		= &init_ipc_ns.mq_msgsize_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
@@ -1273,13 +1283,13 @@ static int __init init_mqueue_fs(void)
 	if (error)
 		goto out_sysctl;
 
-	if (IS_ERR(mqueue_mnt = kern_mount(&mqueue_fs_type))) {
-		error = PTR_ERR(mqueue_mnt);
+	init_ipc_ns.mq_mnt = kern_mount(&mqueue_fs_type);
+	if (IS_ERR(init_ipc_ns.mq_mnt)) {
+		error = PTR_ERR(init_ipc_ns.mq_mnt);
 		goto out_filesystem;
 	}
 
 	/* internal initialization - not common for vfs */
-	queues_count = 0;
 	spin_lock_init(&mq_lock);
 
 	return 0;
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index c82c215..c197cd1 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -13,10 +13,31 @@
 #include <linux/security.h>
 #include <linux/slab.h>
 #include <linux/ipc.h>
+#include <linux/ipc_namespace.h>
 #include <asm/uaccess.h>
 
 #include "util.h"
 
+/*
+ * The next 2 defines are here bc this is the only file
+ * compiled when either CONFIG_SYSVIPC and CONFIG_POSIX_MQUEUE
+ * and not CONFIG_IPC_NS.
+ */
+struct ipc_namespace init_ipc_ns = {
+	.kref = {
+		.refcount	= ATOMIC_INIT(2),
+	},
+#ifdef CONFIG_POSIX_MQUEUE
+	.mq_mnt          = NULL,
+	.mq_queues_count = 0,
+	.mq_queues_max   = DFLT_QUEUESMAX,
+	.mq_msg_max      = DFLT_MSGMAX,
+	.mq_msgsize_max  = DFLT_MSGSIZEMAX,
+#endif
+};
+
+atomic_t nr_ipc_ns = ATOMIC_INIT(1);
+
 struct msg_msgseg {
 	struct msg_msgseg* next;
 	/* the next part of the message follows immediately */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 9171d94..4b4dc6d 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -25,6 +25,7 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
 	sem_init_ns(ns);
 	msg_init_ns(ns);
 	shm_init_ns(ns);
+	mq_init_ns(ns);
 
 	/*
 	 * msgmni has already been computed for the new ipc ns.
@@ -101,6 +102,7 @@ void free_ipc_ns(struct kref *kref)
 	sem_exit_ns(ns);
 	msg_exit_ns(ns);
 	shm_exit_ns(ns);
+	mq_exit_ns(ns);
 	kfree(ns);
 	atomic_dec(&nr_ipc_ns);
 
diff --git a/ipc/util.c b/ipc/util.c
index 5a1808c..ac971ce 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -47,15 +47,6 @@ struct ipc_proc_iface {
 	int (*show)(struct seq_file *, void *);
 };
 
-struct ipc_namespace init_ipc_ns = {
-	.kref = {
-		.refcount	= ATOMIC_INIT(2),
-	},
-};
-
-atomic_t nr_ipc_ns = ATOMIC_INIT(1);
-
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 
 static void ipc_memory_notifier(struct work_struct *work)
diff --git a/ipc/util.h b/ipc/util.h
index 3646b45..52755c1 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -20,6 +20,13 @@ void shm_init (void);
 
 struct ipc_namespace;
 
+#ifdef CONFIG_POSIX_MQUEUE
+void mq_exit_ns(struct ipc_namespace *ns);
+#else
+#define mq_exit_ns(ns)
+#endif
+
+#ifdef CONFIG_SYSVIPC
 void sem_init_ns(struct ipc_namespace *ns);
 void msg_init_ns(struct ipc_namespace *ns);
 void shm_init_ns(struct ipc_namespace *ns);
@@ -27,6 +34,14 @@ void shm_init_ns(struct ipc_namespace *ns);
 void sem_exit_ns(struct ipc_namespace *ns);
 void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
+#else
+#define sem_init_ns(ns)
+#define msg_init_ns(ns)
+#define shm_init_ns(ns)
+#define sem_exit_ns(ns)
+#define msg_exit_ns(ns)
+#define shm_exit_ns(ns)
+#endif
 
 /*
  * Structure that holds the parameters needed by the ipc operations
-- 
1.5.4.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC PATCH 2/2] ipc namespaces: implement support for posix msqueues
       [not found] ` <20081215233747.GA27553-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2008-12-15 23:38   ` [RFC PATCH 1/2] mqueue ns: move mqueue_mnt into struct ipc_namespace Serge E. Hallyn
@ 2008-12-15 23:38   ` Serge E. Hallyn
  2008-12-16  8:11   ` [RFC patch 0/2] posix mqueue namespace (v11) Cedric Le Goater
  2 siblings, 0 replies; 5+ messages in thread
From: Serge E. Hallyn @ 2008-12-15 23:38 UTC (permalink / raw)
  To: Linux Containers

Implement multiple mounts of the mqueue file system, and
link it to usage of CLONE_NEWIPC.

Each ipc ns has a corresponding mqueuefs superblock.  When
a user does clone(CLONE_NEWIPC) or unshare(CLONE_NEWIPC), the
unshare will cause an internal mount of a new mqueuefs sb
linked to the new ipc ns.

When a user does 'mount -t mqueue mqueue /dev/mqueue', he
mounts the mqueuefs superblock.

Posix message queues can be worked with both through the
mq_* system calls (see mq_overview(7)), and through the VFS
through the mqueue mount.  Any usage of mq_open() and friends
will work with the acting task's ipc namespace.  Any actions
through the VFS will work with the mqueuefs in which the
file was created.  So if a user doesn't remount mqueuefs
after unshare(CLONE_NEWIPC), mq_open("/ab") will not be
reflected in "ls /dev/mqueue".

If task a mounts mqueue for ipc_ns:1, then clones task b with
a new ipcns, ipcns:2, and then task a is the last task in
ipc_ns:1 to exit, then (1) ipc_ns:1 will be freed, (2) it's
superblock will live on until task b umounts the corresponding
mqueuefs, and vfs actions will continue to succeed, but (3)
sb->s_fs_info will be NULL for the sb corresponding to the
deceased ipc_ns:1.

Largely based on previous version by Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
and Nadia Derbey <Nadia.Derbey-6ktuUTfB/bM@public.gmane.org>.

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 include/linux/ipc_namespace.h |   16 ++---
 ipc/mqueue.c                  |  150 +++++++++++++++++++++++++++++++++--------
 ipc/msgutil.c                 |    8 +--
 ipc/namespace.c               |   25 +++++--
 ipc/util.h                    |    6 +-
 5 files changed, 154 insertions(+), 51 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 532598f..74f1ae2 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -25,7 +25,7 @@ struct ipc_ids {
 };
 
 struct ipc_namespace {
-	struct kref	kref;
+	atomic_t	count;
 	struct ipc_ids	ids[3];
 
 	int		sem_ctls[4];
@@ -56,6 +56,7 @@ struct ipc_namespace {
 extern struct ipc_namespace init_ipc_ns;
 extern atomic_t nr_ipc_ns;
 
+extern spinlock_t mq_lock;
 #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
 #define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
 #else
@@ -75,18 +76,18 @@ extern int ipcns_notify(unsigned long);
 #endif /* CONFIG_SYSVIPC */
 
 #ifdef CONFIG_POSIX_MQUEUE
-extern void mq_init_ns(struct ipc_namespace *ns);
+extern int mq_init_ns(struct ipc_namespace *ns);
 /* default values */
 #define DFLT_QUEUESMAX 256     /* max number of message queues */
 #define DFLT_MSGMAX    10      /* max number of messages in each queue */
 #define HARD_MSGMAX    (131072/sizeof(void *))
 #define DFLT_MSGSIZEMAX 8192   /* max message size */
 #else
-#define mq_init_ns(ns)
+#define mq_init_ns(ns) (0)
 #endif
 
 #if defined(CONFIG_IPC_NS)
-extern void free_ipc_ns(struct kref *kref);
+extern void free_ipc_ns(struct ipc_namespace *ns);
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
 				       struct ipc_namespace *ns);
 extern void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
@@ -96,14 +97,11 @@ extern void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 {
 	if (ns)
-		kref_get(&ns->kref);
+		atomic_inc(&ns->count);
 	return ns;
 }
 
-static inline void put_ipc_ns(struct ipc_namespace *ns)
-{
-	kref_put(&ns->kref, free_ipc_ns);
-}
+extern void put_ipc_ns(struct ipc_namespace *ns);
 #else
 static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
 		struct ipc_namespace *ns)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 01d64a0..de31ef7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -88,7 +88,6 @@ static const struct file_operations mqueue_file_operations;
 static struct super_operations mqueue_super_ops;
 static void remove_notification(struct mqueue_inode_info *info);
 
-static spinlock_t mq_lock;
 static struct kmem_cache *mqueue_inode_cachep;
 
 static struct ctl_table_header * mq_sysctl_table;
@@ -98,25 +97,40 @@ static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
 	return container_of(inode, struct mqueue_inode_info, vfs_inode);
 }
 
-void mq_init_ns(struct ipc_namespace *ns) {
-	ns->mq_queues_count  = 0;
-	ns->mq_queues_max    = DFLT_QUEUESMAX;
-	ns->mq_msg_max       = DFLT_MSGMAX;
-	ns->mq_msgsize_max   = DFLT_MSGSIZEMAX;
-	ns->mq_mnt           = mntget(init_ipc_ns.mq_mnt);
+/*
+ * This routine should be called with the mq_lock held.
+ */
+static inline struct ipc_namespace *__get_ns_from_ino(struct inode *inode)
+{
+	return get_ipc_ns(inode->i_sb->s_fs_info);
 }
 
-void mq_exit_ns(struct ipc_namespace *ns) {
-	/* will need to clear out ns->mq_mnt->mnt_sb->s_fs_info here */
-	mntput(ns->mq_mnt);
+static inline struct ipc_namespace *get_ns_from_ino(struct inode *inode)
+{
+	struct ipc_namespace *ns;
+
+	spin_lock(&mq_lock);
+	ns = __get_ns_from_ino(inode);
+	spin_unlock(&mq_lock);
+	return ns;
+}
+
+static inline struct ipc_namespace *get_ns_from_sb(struct super_block *sb)
+{
+	struct ipc_namespace *ns;
+
+	spin_lock(&mq_lock);
+	ns = get_ipc_ns(sb->s_fs_info);
+	spin_unlock(&mq_lock);
+	return ns;
 }
 
-static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
-							struct mq_attr *attr)
+static struct inode *mqueue_get_inode(struct super_block *sb,
+		struct ipc_namespace *ipc_ns, int mode,
+		struct mq_attr *attr)
 {
 	struct user_struct *u = current_user();
 	struct inode *inode;
-	struct ipc_namespace *ipc_ns = &init_ipc_ns;
 
 	inode = new_inode(sb);
 	if (inode) {
@@ -192,30 +206,76 @@ out_inode:
 static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
+	struct ipc_namespace *ns = data;
+	int error = 0;
 
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = MQUEUE_MAGIC;
 	sb->s_op = &mqueue_super_ops;
 
-	inode = mqueue_get_inode(sb, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
-	if (!inode)
-		return -ENOMEM;
+	inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO,
+				NULL);
+	if (!inode) {
+		error = -ENOMEM;
+		goto out;
+	}
 
 	sb->s_root = d_alloc_root(inode);
 	if (!sb->s_root) {
 		iput(inode);
-		return -ENOMEM;
+		error = -ENOMEM;
 	}
 
-	return 0;
+out:
+	return error;
+}
+
+static int compare_sb_single_ns(struct super_block *sb, void *data)
+{
+	return sb->s_fs_info == data;
+}
+
+static int set_sb_single_ns(struct super_block *sb, void *data)
+{
+	sb->s_fs_info = data;
+	return set_anon_super(sb, NULL);
+}
+
+static int get_sb_single_ns(struct file_system_type *fs_type,
+		int flags, void *data,
+		int (*fill_super)(struct super_block *, void *, int),
+		struct vfsmount *mnt)
+{
+	struct super_block *s;
+	int error;
+
+	s = sget(fs_type, compare_sb_single_ns, set_sb_single_ns, data);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+	if (!s->s_root) {
+		s->s_flags = flags;
+		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
+		if (error) {
+			up_write(&s->s_umount);
+			deactivate_super(s);
+			return error;
+		}
+		s->s_flags |= MS_ACTIVE;
+	}
+	do_remount_sb(s, flags, data, 0);
+	return simple_set_mnt(mnt, s);
 }
 
 static int mqueue_get_sb(struct file_system_type *fs_type,
 			 int flags, const char *dev_name,
 			 void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt);
+	if (flags & MS_KERNMOUNT)
+		return get_sb_single_ns(fs_type, flags, data,
+					mqueue_fill_super, mnt);
+	return get_sb_single_ns(fs_type, flags, current->nsproxy->ipc_ns,
+				mqueue_fill_super, mnt);
 }
 
 static void init_once(void *foo)
@@ -246,12 +306,13 @@ static void mqueue_delete_inode(struct inode *inode)
 	struct user_struct *user;
 	unsigned long mq_bytes;
 	int i;
-	struct ipc_namespace *ipc_ns = &init_ipc_ns;
+	struct ipc_namespace *ipc_ns;
 
 	if (S_ISDIR(inode->i_mode)) {
 		clear_inode(inode);
 		return;
 	}
+	ipc_ns = get_ns_from_ino(inode);
 	info = MQUEUE_I(inode);
 	spin_lock(&info->lock);
 	for (i = 0; i < info->attr.mq_curmsgs; i++)
@@ -267,10 +328,12 @@ static void mqueue_delete_inode(struct inode *inode)
 	if (user) {
 		spin_lock(&mq_lock);
 		user->mq_bytes -= mq_bytes;
-		ipc_ns->mq_queues_count--;
+		if (ipc_ns)
+			ipc_ns->mq_queues_count--;
 		spin_unlock(&mq_lock);
 		free_uid(user);
 	}
+	put_ipc_ns(ipc_ns);
 }
 
 static int mqueue_create(struct inode *dir, struct dentry *dentry,
@@ -279,9 +342,14 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
 	struct inode *inode;
 	struct mq_attr *attr = dentry->d_fsdata;
 	int error;
-	struct ipc_namespace *ipc_ns = &init_ipc_ns;
+	struct ipc_namespace *ipc_ns;
 
 	spin_lock(&mq_lock);
+	ipc_ns = __get_ns_from_ino(dir);
+	if (!ipc_ns) {
+		error = -EACCES;
+		goto out_lock;
+	}
 	if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
 			!capable(CAP_SYS_RESOURCE)) {
 		error = -ENOSPC;
@@ -290,7 +358,7 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
 	ipc_ns->mq_queues_count++;
 	spin_unlock(&mq_lock);
 
-	inode = mqueue_get_inode(dir->i_sb, mode, attr);
+	inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
 	if (!inode) {
 		error = -ENOMEM;
 		spin_lock(&mq_lock);
@@ -298,6 +366,7 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
 		goto out_lock;
 	}
 
+	put_ipc_ns(ipc_ns);
 	dir->i_size += DIRENT_SIZE;
 	dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
 
@@ -306,6 +375,7 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
 	return 0;
 out_lock:
 	spin_unlock(&mq_lock);
+	put_ipc_ns(ipc_ns);
 	return error;
 }
 
@@ -673,7 +743,7 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 	struct file *filp;
 	char *name;
 	int fd, error;
-	struct ipc_namespace *ipc_ns = &init_ipc_ns;
+	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
 
 	error = audit_mq_open(oflag, mode, u_attr);
 	if (error != 0)
@@ -741,7 +811,7 @@ asmlinkage long sys_mq_unlink(const char __user *u_name)
 	char *name;
 	struct dentry *dentry;
 	struct inode *inode = NULL;
-	struct ipc_namespace *ipc_ns = &init_ipc_ns;
+	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
 
 	name = getname(u_name);
 	if (IS_ERR(name))
@@ -1212,6 +1282,29 @@ static struct file_system_type mqueue_fs_type = {
 	.kill_sb = kill_litter_super,
 };
 
+int mq_init_ns(struct ipc_namespace *ns)
+{
+	ns->mq_queues_count  = 0;
+	ns->mq_queues_max    = DFLT_QUEUESMAX;
+	ns->mq_msg_max       = DFLT_MSGMAX;
+	ns->mq_msgsize_max   = DFLT_MSGSIZEMAX;
+
+	ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
+	if (IS_ERR(ns->mq_mnt))
+		return PTR_ERR(ns->mq_mnt);
+	return 0;
+}
+
+void mq_clear_sbinfo(struct ipc_namespace *ns)
+{
+	ns->mq_mnt->mnt_sb->s_fs_info = NULL;
+}
+
+void mq_put_mnt(struct ipc_namespace *ns)
+{
+	mntput(ns->mq_mnt);
+}
+
 static int msg_max_limit_min = MIN_MSGMAX;
 static int msg_max_limit_max = MAX_MSGMAX;
 
@@ -1283,15 +1376,14 @@ static int __init init_mqueue_fs(void)
 	if (error)
 		goto out_sysctl;
 
-	init_ipc_ns.mq_mnt = kern_mount(&mqueue_fs_type);
+	spin_lock_init(&mq_lock);
+
+	init_ipc_ns.mq_mnt = kern_mount_data(&mqueue_fs_type, &init_ipc_ns);
 	if (IS_ERR(init_ipc_ns.mq_mnt)) {
 		error = PTR_ERR(init_ipc_ns.mq_mnt);
 		goto out_filesystem;
 	}
 
-	/* internal initialization - not common for vfs */
-	spin_lock_init(&mq_lock);
-
 	return 0;
 
 out_filesystem:
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index c197cd1..21475b0 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -18,18 +18,16 @@
 
 #include "util.h"
 
+spinlock_t mq_lock;
+
 /*
  * The next 2 defines are here bc this is the only file
  * compiled when either CONFIG_SYSVIPC and CONFIG_POSIX_MQUEUE
  * and not CONFIG_IPC_NS.
  */
 struct ipc_namespace init_ipc_ns = {
-	.kref = {
-		.refcount	= ATOMIC_INIT(2),
-	},
+	.count		= ATOMIC_INIT(2),
 #ifdef CONFIG_POSIX_MQUEUE
-	.mq_mnt          = NULL,
-	.mq_queues_count = 0,
 	.mq_queues_max   = DFLT_QUEUESMAX,
 	.mq_msg_max      = DFLT_MSGMAX,
 	.mq_msgsize_max  = DFLT_MSGSIZEMAX,
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 4b4dc6d..a4f36ba 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -9,23 +9,31 @@
 #include <linux/rcupdate.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
 
 #include "util.h"
 
 static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
 {
 	struct ipc_namespace *ns;
+	int err;
 
 	ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
 	if (ns == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	atomic_set(&ns->count, 1);
+	err = mq_init_ns(ns);
+	if (err) {
+		kfree(ns);
+		return ERR_PTR(err);
+	}
 	atomic_inc(&nr_ipc_ns);
 
 	sem_init_ns(ns);
 	msg_init_ns(ns);
 	shm_init_ns(ns);
-	mq_init_ns(ns);
 
 	/*
 	 * msgmni has already been computed for the new ipc ns.
@@ -35,7 +43,6 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
 	ipcns_notify(IPCNS_CREATED);
 	register_ipcns_notifier(ns);
 
-	kref_init(&ns->kref);
 	return ns;
 }
 
@@ -85,11 +92,18 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
 	up_write(&ids->rw_mutex);
 }
 
-void free_ipc_ns(struct kref *kref)
+void put_ipc_ns(struct ipc_namespace *ns)
 {
-	struct ipc_namespace *ns;
+	if (ns && atomic_dec_and_lock(&ns->count, &mq_lock)) {
+		mq_clear_sbinfo(ns);
+		spin_unlock(&mq_lock);
+		mq_put_mnt(ns);
+		free_ipc_ns(ns);
+	}
+}
 
-	ns = container_of(kref, struct ipc_namespace, kref);
+void free_ipc_ns(struct ipc_namespace *ns)
+{
 	/*
 	 * Unregistering the hotplug notifier at the beginning guarantees
 	 * that the ipc namespace won't be freed while we are inside the
@@ -102,7 +116,6 @@ void free_ipc_ns(struct kref *kref)
 	sem_exit_ns(ns);
 	msg_exit_ns(ns);
 	shm_exit_ns(ns);
-	mq_exit_ns(ns);
 	kfree(ns);
 	atomic_dec(&nr_ipc_ns);
 
diff --git a/ipc/util.h b/ipc/util.h
index 52755c1..b4d213f 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -21,9 +21,11 @@ void shm_init (void);
 struct ipc_namespace;
 
 #ifdef CONFIG_POSIX_MQUEUE
-void mq_exit_ns(struct ipc_namespace *ns);
+extern void mq_clear_sbinfo(struct ipc_namespace *ns);
+extern void mq_put_mnt(struct ipc_namespace *ns);
 #else
-#define mq_exit_ns(ns)
+#define mq_clear_sbinfo(ns)
+#define mq_put_mnt(ns)
 #endif
 
 #ifdef CONFIG_SYSVIPC
-- 
1.5.4.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC patch 0/2] posix mqueue namespace (v11)
       [not found] ` <20081215233747.GA27553-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2008-12-15 23:38   ` [RFC PATCH 1/2] mqueue ns: move mqueue_mnt into struct ipc_namespace Serge E. Hallyn
  2008-12-15 23:38   ` [RFC PATCH 2/2] ipc namespaces: implement support for posix msqueues Serge E. Hallyn
@ 2008-12-16  8:11   ` Cedric Le Goater
       [not found]     ` <4947629D.9040807-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
  2 siblings, 1 reply; 5+ messages in thread
From: Cedric Le Goater @ 2008-12-16  8:11 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers

Serge E. Hallyn wrote:
> (Ok I don't know what the actual version number is - it's
> high but 11 is probably safe)
> 
> Cedric and Nadia took several approaches to making posix
> message queues per-namespace.  I ended up mamking some
> deep changes so am not retaining their Signed-off-by:s
> on this version, but this is definately very much based
> on work by both of them.

you can keep mine. i have had a similar version on 2.6.26. 

http://legoater.free.fr/patches/2.6.26/2.6.26/

and it's easier to track where the patches go.

> Patch 2 hopefully explains my approach.  Briefly,
> 
> 	1. sysv and posix ipc are both under CLONE_NEWIPC
> 	2. the mqueue sb is per-ipc-namespace
> 
> So to create a new ipc namespace, you would
> 
> 	unshare(CLONE_NEWIPC|CLONE_NEWNS);

does CLONE_NEWIPC requires CLONE_NEWNS ? 

> 	umount /dev/mqueue
> 	mount -t mqueue mqueue /dev/mqueue

the semantic looks good, much better than a 'newinstance' mount 
option.

if CLONE_NEWNS is not required, what happens to the user mount (and
the mq_ns below it) when the task dies. that's the big issue. if 
CLONE_NEWNS is required were safe, but I think Pavel made
some objection to that. 

> It's perfectly valid to do vfs operations on files
> in another ipc_namespace's /dev/mqueue, but any use
> of mq_open(3) and friends will act in your own ipc_ns.

ok.

C.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC patch 0/2] posix mqueue namespace (v11)
       [not found]     ` <4947629D.9040807-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
@ 2008-12-16 15:14       ` Serge E. Hallyn
  0 siblings, 0 replies; 5+ messages in thread
From: Serge E. Hallyn @ 2008-12-16 15:14 UTC (permalink / raw)
  To: Cedric Le Goater; +Cc: Linux Containers, Pavel Emelyanov

Quoting Cedric Le Goater (clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org):
> Serge E. Hallyn wrote:
> > (Ok I don't know what the actual version number is - it's
> > high but 11 is probably safe)
> > 
> > Cedric and Nadia took several approaches to making posix
> > message queues per-namespace.  I ended up mamking some
> > deep changes so am not retaining their Signed-off-by:s
> > on this version, but this is definately very much based
> > on work by both of them.
> 
> you can keep mine. i have had a similar version on 2.6.26. 
> 
> http://legoater.free.fr/patches/2.6.26/2.6.26/
> 
> and it's easier to track where the patches go.
> 
> > Patch 2 hopefully explains my approach.  Briefly,

Thanks, Cedric, will put those back.

> > 	1. sysv and posix ipc are both under CLONE_NEWIPC
> > 	2. the mqueue sb is per-ipc-namespace
> > 
> > So to create a new ipc namespace, you would
> > 
> > 	unshare(CLONE_NEWIPC|CLONE_NEWNS);
> 
> does CLONE_NEWIPC requires CLONE_NEWNS ? 

No, the mq_* syscalls don't need the fs to be actually mounted,
and a container could just chroot("/vs1"); and mount -t mqueue
under /vs1/dev/mqueue, not requiring a new mounts namespace.

> > 	umount /dev/mqueue
> > 	mount -t mqueue mqueue /dev/mqueue
> 
> the semantic looks good, much better than a 'newinstance' mount 
> option.

Agreed.  newinstance works for a pure filesystem like devpts,
but it simply isn't a good fit for mqueue.

> if CLONE_NEWNS is not required, what happens to the user mount (and
> the mq_ns below it) when the task dies. that's the big issue. if 
> CLONE_NEWNS is required were safe, but I think Pavel made
> some objection to that. 

(Huh, I just noticed get_ns_from_sb() doesn't seem to be called
anywhere <scribble><scribble>)

Short version:
The user mount hangs around until someone umounts it.  Now of course
I expect that most users WILL want to do CLONE_NEWIPC|CLONE_NEWNS.

Long version:
Any VFS actions through mqueuefs will do:
	spin_lock(&mq_lock);
	ipc_ns = get_ipc_ns(inode->i_sb->s_fs_info);
	spin_unlock(&mq_lock);
where s_fs_info is the ipc_ns.  Freeing an ipc_ns does
	if (atomic_dec_and_lock(&ipc_ns->count, &mq_lock)) {
		mq_ns->mnt->mnt_sb->s_fs_info = NULL;
		spin_unlock(&mq_lock);
		mntput(mq_ns->mnt);
	}

So if a vfs_create() by a task in another ipc_ns is racing with the
task exit of the last task in the ipc_ns, then either
	1. the vfs_create() manages to pin the ipc_ns before
	   the other task exits.  So the task exit won't
	   free the ipc_ns.  The put_ipc_ns() at the end
	   of vfs_create() will.
or
	2. the task exits first, vfs_create() finds
	   s_fs_info NULL, and returns -EACCES.  Unlink
	   simply succeeds.

Pavel, please let me know if you have issues with my approach.

> > It's perfectly valid to do vfs operations on files
> > in another ipc_namespace's /dev/mqueue, but any use
> > of mq_open(3) and friends will act in your own ipc_ns.
> 
> ok.

Nadia had written a cool set of ltp tests.  They were based
around the mount -o newinstance semantics so i'll have to
see which ones are still relevant and rework some others,
then will post them and repost the kernel patchset.

Thanks for taking a look, Cedric, and for getting this set
going before.

-serge

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-12-16 15:14 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-12-15 23:37 [RFC patch 0/2] posix mqueue namespace (v11) Serge E. Hallyn
     [not found] ` <20081215233747.GA27553-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-12-15 23:38   ` [RFC PATCH 1/2] mqueue ns: move mqueue_mnt into struct ipc_namespace Serge E. Hallyn
2008-12-15 23:38   ` [RFC PATCH 2/2] ipc namespaces: implement support for posix msqueues Serge E. Hallyn
2008-12-16  8:11   ` [RFC patch 0/2] posix mqueue namespace (v11) Cedric Le Goater
     [not found]     ` <4947629D.9040807-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2008-12-16 15:14       ` Serge E. Hallyn

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.