Linux Container Development
 help / color / mirror / Atom feed
From: menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org
To: containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org
Cc: Kirill Korotaev <dev-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>,
	Paul Jackson <pj-sJ/iWh9BUns@public.gmane.org>,
	"Eric W. Biederman"
	<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>,
	Andrew Morton
	<akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
Subject: [PATCH 05/29] task containersv11 add container_clone interface
Date: Tue, 11 Sep 2007 12:52:44 -0700	[thread overview]
Message-ID: <20070911200145.437472000@menage.corp.google.com> (raw)
In-Reply-To: 20070911195239.997111000@menage.corp.google.com

[-- Attachment #1: task-containersv11-add-container_clone-interface.patch --]
[-- Type: text/plain, Size: 6721 bytes --]

From: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

Add support for cgroup_clone(), a way to create new cgroups intended to
be used for systems such as namespace unsharing.  A new subsystem callback,
post_clone(), is added to allow subsystems to automatically configure cloned
cgroups.

Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Cc: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
Cc: Dave Hansen <haveblue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Cc: Balbir Singh <balbir-xthvdsQ13ZrQT0dZR+AlfA@public.gmane.org>
Cc: Paul Jackson <pj-sJ/iWh9BUns@public.gmane.org>
Cc: Kirill Korotaev <dev-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
Cc: Herbert Poetzl <herbert-dBHVzrDq9nF4Lj/PQRBjDg@public.gmane.org>
Cc: Srivatsa Vaddagiri <vatsa-xthvdsQ13ZrQT0dZR+AlfA@public.gmane.org>
Cc: Cedric Le Goater <clg-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Andrew Morton <akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
---

 Documentation/cgroups.txt |    7 +
 include/linux/cgroup.h    |    3 
 kernel/cgroup.c           |  135 +++++++++++++++++++++++++++++++++
 3 files changed, 145 insertions(+)

diff -puN Documentation/cgroups.txt~task-cgroupsv11-add-cgroup_clone-interface Documentation/cgroups.txt
--- a/Documentation/cgroups.txt~task-cgroupsv11-add-cgroup_clone-interface
+++ a/Documentation/cgroups.txt
@@ -504,6 +504,13 @@ include/linux/cgroup.h for details). 
 method can return an error code, the error code is currently not
 always handled well.
 
+void post_clone(struct cgroup_subsys *ss, struct cgroup *cont)
+
+Called at the end of cgroup_clone() to do any paramater
+initialization which might be required before a task could attach.  For
+example in cpusets, no task may attach before 'cpus' and 'mems' are set
+up.
+
 void bind(struct cgroup_subsys *ss, struct cgroup *root)
 LL=callback_mutex
 
diff -puN include/linux/cgroup.h~task-cgroupsv11-add-cgroup_clone-interface include/linux/cgroup.h
--- a/include/linux/cgroup.h~task-cgroupsv11-add-cgroup_clone-interface
+++ a/include/linux/cgroup.h
@@ -174,6 +174,7 @@ struct cgroup_subsys {
 	void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
 	int (*populate)(struct cgroup_subsys *ss,
 			struct cgroup *cont);
+	void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cont);
 	void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
 	int subsys_id;
 	int active;
@@ -213,6 +214,8 @@ static inline struct cgroup* task_con
 
 int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
 
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
+
 #else /* !CONFIG_CGROUPS */
 
 static inline int cgroup_init_early(void) { return 0; }
diff -puN kernel/cgroup.c~task-cgroupsv11-add-cgroup_clone-interface kernel/cgroup.c
--- a/kernel/cgroup.c~task-cgroupsv11-add-cgroup_clone-interface
+++ a/kernel/cgroup.c
@@ -1684,3 +1684,138 @@ void cgroup_exit(struct task_struct *
 	tsk->cgroups = init_task.cgroups;
 	task_unlock(tsk);
 }
+
+/**
+ * cgroup_clone - duplicate the current cgroup in the hierarchy
+ * that the given subsystem is attached to, and move this task into
+ * the new child
+ */
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
+{
+	struct dentry *dentry;
+	int ret = 0;
+	char nodename[MAX_CGROUP_TYPE_NAMELEN];
+	struct cgroup *parent, *child;
+	struct inode *inode;
+	struct css_set *cg;
+	struct cgroupfs_root *root;
+	struct cgroup_subsys *ss;
+
+	/* We shouldn't be called by an unregistered subsystem */
+	BUG_ON(!subsys->active);
+
+	/* First figure out what hierarchy and cgroup we're dealing
+	 * with, and pin them so we can drop cgroup_mutex */
+	mutex_lock(&cgroup_mutex);
+ again:
+	root = subsys->root;
+	if (root == &rootnode) {
+		printk(KERN_INFO
+		       "Not cloning cgroup for unused subsystem %s\n",
+		       subsys->name);
+		mutex_unlock(&cgroup_mutex);
+		return 0;
+	}
+	cg = &tsk->cgroups;
+	parent = task_cgroup(tsk, subsys->subsys_id);
+
+	snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid);
+
+	/* Pin the hierarchy */
+	atomic_inc(&parent->root->sb->s_active);
+
+	mutex_unlock(&cgroup_mutex);
+
+	/* Now do the VFS work to create a cgroup */
+	inode = parent->dentry->d_inode;
+
+	/* Hold the parent directory mutex across this operation to
+	 * stop anyone else deleting the new cgroup */
+	mutex_lock(&inode->i_mutex);
+	dentry = cgroup_get_dentry(parent->dentry, nodename);
+	if (IS_ERR(dentry)) {
+		printk(KERN_INFO
+		       "Couldn't allocate dentry for %s: %ld\n", nodename,
+		       PTR_ERR(dentry));
+		ret = PTR_ERR(dentry);
+		goto out_release;
+	}
+
+	/* Create the cgroup directory, which also creates the cgroup */
+	ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
+	child = __d_cont(dentry);
+	dput(dentry);
+	if (ret) {
+		printk(KERN_INFO
+		       "Failed to create cgroup %s: %d\n", nodename,
+		       ret);
+		goto out_release;
+	}
+
+	if (!child) {
+		printk(KERN_INFO
+		       "Couldn't find new cgroup %s\n", nodename);
+		ret = -ENOMEM;
+		goto out_release;
+	}
+
+	/* The cgroup now exists. Retake cgroup_mutex and check
+	 * that we're still in the same state that we thought we
+	 * were. */
+	mutex_lock(&cgroup_mutex);
+	if ((root != subsys->root) ||
+	    (parent != task_cgroup(tsk, subsys->subsys_id))) {
+		/* Aargh, we raced ... */
+		mutex_unlock(&inode->i_mutex);
+
+		deactivate_super(parent->root->sb);
+		/* The cgroup is still accessible in the VFS, but
+		 * we're not going to try to rmdir() it at this
+		 * point. */
+		printk(KERN_INFO
+		       "Race in cgroup_clone() - leaking cgroup %s\n",
+		       nodename);
+		goto again;
+	}
+
+	/* do any required auto-setup */
+	for_each_subsys(root, ss) {
+		if (ss->post_clone)
+			ss->post_clone(ss, child);
+	}
+
+	/* All seems fine. Finish by moving the task into the new cgroup */
+	ret = attach_task(child, tsk);
+	mutex_unlock(&cgroup_mutex);
+
+ out_release:
+	mutex_unlock(&inode->i_mutex);
+	deactivate_super(parent->root->sb);
+	return ret;
+}
+
+/*
+ * See if "cont" is a descendant of the current task's cgroup in
+ * the appropriate hierarchy
+ *
+ * If we are sending in dummytop, then presumably we are creating
+ * the top cgroup in the subsystem.
+ *
+ * Called only by the ns (nsproxy) cgroup.
+ */
+int cgroup_is_descendant(const struct cgroup *cont)
+{
+	int ret;
+	struct cgroup *target;
+	int subsys_id;
+
+	if (cont == dummytop)
+		return 1;
+
+	get_first_subsys(cont, NULL, &subsys_id);
+	target = task_cgroup(current, subsys_id);
+	while (cont != target && cont!= cont->top_cgroup)
+		cont = cont->parent;
+	ret = (cont == target);
+	return ret;
+}
_

--

  parent reply	other threads:[~2007-09-11 19:52 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-09-11 19:52 [PATCH 00/29] Rename Containers to Control Groups menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 01/29] task containersv11 basic task container framework menage-hpIqsD4AKlfQT0dZR+AlfA
     [not found]   ` <20070911200144.779221000-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2007-09-11 20:07     ` Andrew Morton
     [not found]       ` <20070911130731.e9df6e65.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
2007-09-11 20:11         ` Paul Menage
2007-09-30  4:40     ` Paul Jackson
     [not found]       ` <20070929214043.57e9cc39.pj-sJ/iWh9BUns@public.gmane.org>
2007-09-30  5:10         ` Paul Jackson
     [not found]           ` <20070929221030.04881227.pj-sJ/iWh9BUns@public.gmane.org>
2007-09-30  5:14             ` Paul Jackson
2007-09-30  7:10         ` Paul Menage
     [not found]           ` <6599ad830709300010xda1e97cp8c569ce08a87a86b-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-09-30  9:03             ` Andrew Morton
     [not found]               ` <20070930020330.6bd34dd4.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
2007-09-30  9:15                 ` Paul Jackson
     [not found]                   ` <20070930021536.3bd65dc3.pj-sJ/iWh9BUns@public.gmane.org>
2007-09-30  9:29                     ` Andrew Morton
     [not found]                       ` <20070930022942.b36dd34f.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
2007-09-30  9:36                         ` Paul Jackson
2007-09-11 19:52 ` [PATCH 02/29] task containersv11 basic task container framework fix menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 03/29] task containersv11 add tasks file interface menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 04/29] task containersv11 add fork exit hooks menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` menage-hpIqsD4AKlfQT0dZR+AlfA [this message]
2007-09-11 19:52 ` [PATCH 06/29] task containersv11 add procfs interface menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 07/29] task containersv11 shared container subsystem group arrays menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 08/29] task containersv11 shared container subsystem group arrays avoid lockdep warning menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 09/29] task containersv11 shared container subsystem group arrays include fix menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 10/29] task containersv11 automatic userspace notification of idle containers menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 11/29] task containersv11 make cpusets a client of containers menage-hpIqsD4AKlfQT0dZR+AlfA
     [not found]   ` <20070911200146.422879000-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2007-09-30  6:25     ` Paul Jackson
     [not found]       ` <20070929232513.63fe2d9c.pj-sJ/iWh9BUns@public.gmane.org>
2007-09-30  7:11         ` Paul Menage
     [not found]           ` <6599ad830709300011q6831a17ei60f21a06f795bead-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-09-30  7:19             ` Paul Jackson
2007-09-11 19:52 ` [PATCH 12/29] task containersv11 example cpu accounting subsystem menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 13/29] task containersv11 simple task container debug info subsystem menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 14/29] add containerstats v3 menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 15/29] add containerstats v3 fix menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 16/29] containers implement namespace tracking subsystem menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 17/29] containers implement namespace tracking subsystem fix order of container subsystems in init kconfig menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 18/29] memory controller add documentation menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 19/29] memory controller resource counters v7 menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:52 ` [PATCH 20/29] memory controller resource counters v7 fix menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:53 ` [PATCH 21/29] memory controller containers setup v7 menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:53 ` [PATCH 22/29] memory controller accounting " menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:53 ` [PATCH 23/29] memory controller memory accounting v7 menage-hpIqsD4AKlfQT0dZR+AlfA
     [not found]   ` <20070911200148.396756000-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2007-09-12 20:56     ` Peter Zijlstra
2007-09-13  9:49       ` Balbir Singh
     [not found]         ` <46E9078D.5040908-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-09-13 10:18           ` Peter Zijlstra
2007-09-13 10:29             ` Balbir Singh
2007-09-11 19:53 ` [PATCH 24/29] memory controller task migration v7 menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:53 ` [PATCH 25/29] memory controller add per container lru and reclaim v7 menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:53 ` [PATCH 26/29] memory controller add per container lru and reclaim v7 fix menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:53 ` [PATCH 27/29] memory controller oom handling v7 menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:53 ` [PATCH 28/29] memory controller add switch to control what type of pages to limit v7 menage-hpIqsD4AKlfQT0dZR+AlfA
2007-09-11 19:53 ` [PATCH 29/29] memory controller make page_referenced container aware v7 menage-hpIqsD4AKlfQT0dZR+AlfA

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070911200145.437472000@menage.corp.google.com \
    --to=menage-hpiqsd4aklfqt0dzr+alfa@public.gmane.org \
    --cc=akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
    --cc=containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org \
    --cc=dev-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org \
    --cc=ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org \
    --cc=pj-sJ/iWh9BUns@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox