linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ben Blum <bblum@andrew.cmu.edu>
To: linux-kernel@vger.kernel.org, containers@lists.linux-foundation.org
Cc: akpm@linux-foundation.org, bblum@andrew.cmu.edu,
	ebiederm@xmission.com, lizf@cn.fujitsu.com, matthltc@us.ibm.com,
	menage@google.com, oleg@redhat.com
Subject: [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup
Date: Sat, 29 May 2010 21:31:36 -0400	[thread overview]
Message-ID: <20100530013136.GB762@ghc01.ghc.andrew.cmu.edu> (raw)
In-Reply-To: <20100530013002.GA762@ghc01.ghc.andrew.cmu.edu>

[-- Attachment #1: cgroup-threadgroup-fork-lock.patch --]
[-- Type: text/plain, Size: 7559 bytes --]

Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup

From: Ben Blum <bblum@andrew.cmu.edu>

This patch adds an rwsem that lives in a threadgroup's signal_struct that's
taken for reading in the fork path, under CONFIG_CGROUPS. If another part of
the kernel later wants to use such a locking mechanism, the CONFIG_CGROUPS
ifdefs should be changed to a higher-up flag that CGROUPS and the other system
would both depend on.

This is a pre-patch for cgroups-procs-write.patch.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
---
 include/linux/cgroup.h    |   15 ++++++++++-----
 include/linux/init_task.h |    9 +++++++++
 include/linux/sched.h     |   10 ++++++++++
 kernel/cgroup.c           |   23 +++++++++++++++++++++--
 kernel/fork.c             |   10 +++++++---
 5 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8f78073..196a703 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -31,10 +31,12 @@ extern void cgroup_lock(void);
 extern int cgroup_lock_is_held(void);
 extern bool cgroup_lock_live_group(struct cgroup *cgrp);
 extern void cgroup_unlock(void);
-extern void cgroup_fork(struct task_struct *p);
+extern void cgroup_fork(struct task_struct *p, unsigned long clone_flags);
 extern void cgroup_fork_callbacks(struct task_struct *p);
-extern void cgroup_post_fork(struct task_struct *p);
+extern void cgroup_post_fork(struct task_struct *p, unsigned long clone_flags);
 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
+extern void cgroup_fork_failed(struct task_struct *p, int run_callbacks,
+			       unsigned long clone_flags);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 extern int cgroup_load_subsys(struct cgroup_subsys *ss);
@@ -613,11 +615,14 @@ unsigned short css_depth(struct cgroup_subsys_state *css);
 
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
-static inline void cgroup_fork(struct task_struct *p) {}
+static inline void cgroup_fork(struct task_struct *p,
+			       unsigned long clone_flags) {}
 static inline void cgroup_fork_callbacks(struct task_struct *p) {}
-static inline void cgroup_post_fork(struct task_struct *p) {}
+static inline void cgroup_post_fork(struct task_struct *p,
+				    unsigned long clone_flags) {}
 static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
-
+static inline void cgroup_fork_failed(struct task_struct *p, int callbacks,
+				      unsigned long clone_flags) {}
 static inline void cgroup_lock(void) {}
 static inline void cgroup_unlock(void) {}
 static inline int cgroupstats_build(struct cgroupstats *stats,
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index b1ed1cd..cfb2bc8 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,6 +15,14 @@
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
+#ifdef CONFIG_CGROUPS
+#define INIT_THREADGROUP_FORK_LOCK(sig)					\
+	.threadgroup_fork_lock =					\
+		__RWSEM_INITIALIZER(sig.threadgroup_fork_lock),
+#else
+#define INIT_THREADGROUP_FORK_LOCK(sig)
+#endif
+
 #define INIT_SIGNALS(sig) {						\
 	.count		= ATOMIC_INIT(1), 				\
 	.wait_chldexit	= __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
@@ -29,6 +37,7 @@ extern struct fs_struct init_fs;
 		.running = 0,						\
 		.lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock),	\
 	},								\
+	INIT_THREADGROUP_FORK_LOCK(sig)					\
 }
 
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b7b81d..2bbcbd2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -627,6 +627,16 @@ struct signal_struct {
 	unsigned audit_tty;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
+#ifdef CONFIG_CGROUPS
+	/*
+	 * The threadgroup_fork_lock prevents threads from forking with
+	 * CLONE_THREAD while held for writing. Use this for fork-sensitive
+	 * threadgroup-wide operations.
+	 * Currently only needed by cgroups, and the fork-side readlock happens
+	 * in cgroup_{fork,post_fork,fork_failed}.
+	 */
+	struct rw_semaphore threadgroup_fork_lock;
+#endif
 
 	int oom_adj;	/* OOM kill score adjustment (bit shift) */
 };
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 6d870f2..6c8e46f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4015,8 +4015,10 @@ static const struct file_operations proc_cgroupstats_operations = {
  * At the point that cgroup_fork() is called, 'current' is the parent
  * task, and the passed argument 'child' points to the child task.
  */
-void cgroup_fork(struct task_struct *child)
+void cgroup_fork(struct task_struct *child, unsigned long clone_flags)
 {
+	if (clone_flags & CLONE_THREAD)
+		down_read(&current->signal->threadgroup_fork_lock);
 	task_lock(current);
 	child->cgroups = current->cgroups;
 	get_css_set(child->cgroups);
@@ -4058,7 +4060,7 @@ void cgroup_fork_callbacks(struct task_struct *child)
  * with the first call to cgroup_iter_start() - to guarantee that the
  * new task ends up on its list.
  */
-void cgroup_post_fork(struct task_struct *child)
+void cgroup_post_fork(struct task_struct *child, unsigned long clone_flags)
 {
 	if (use_task_css_set_links) {
 		write_lock(&css_set_lock);
@@ -4068,6 +4070,8 @@ void cgroup_post_fork(struct task_struct *child)
 		task_unlock(child);
 		write_unlock(&css_set_lock);
 	}
+	if (clone_flags & CLONE_THREAD)
+		up_read(&current->signal->threadgroup_fork_lock);
 }
 /**
  * cgroup_exit - detach cgroup from exiting task
@@ -4143,6 +4147,21 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 }
 
 /**
+ * cgroup_fork_failed - undo operations for fork failure
+ * @tsk: pointer to  task_struct of exiting process
+ * @run_callback: run exit callbacks?
+ *
+ * Wrapper for cgroup_exit that also drops the fork lock.
+ */
+void cgroup_fork_failed(struct task_struct *tsk, int run_callbacks,
+			unsigned long clone_flags)
+{
+	if (clone_flags & CLONE_THREAD)
+		up_read(&current->signal->threadgroup_fork_lock);
+	cgroup_exit(tsk, run_callbacks);
+}
+
+/**
  * cgroup_clone - clone the cgroup the given subsystem is attached to
  * @tsk: the task to be moved
  * @subsys: the given subsystem
diff --git a/kernel/fork.c b/kernel/fork.c
index 4c14942..e2b04ac 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -884,6 +884,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 	tty_audit_fork(sig);
 
+#ifdef CONFIG_CGROUPS
+	init_rwsem(&sig->threadgroup_fork_lock);
+#endif
+
 	sig->oom_adj = current->signal->oom_adj;
 
 	return 0;
@@ -1069,7 +1073,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	monotonic_to_bootbased(&p->real_start_time);
 	p->io_context = NULL;
 	p->audit_context = NULL;
-	cgroup_fork(p);
+	cgroup_fork(p, clone_flags);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
  	if (IS_ERR(p->mempolicy)) {
@@ -1277,7 +1281,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
-	cgroup_post_fork(p);
+	cgroup_post_fork(p, clone_flags);
 	perf_event_fork(p);
 	return p;
 
@@ -1311,7 +1315,7 @@ bad_fork_cleanup_policy:
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
 #endif
-	cgroup_exit(p, cgroup_callbacks_done);
+	cgroup_fork_failed(p, cgroup_callbacks_done, clone_flags);
 	delayacct_tsk_free(p);
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:

  reply	other threads:[~2010-05-30  1:50 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-30  1:30 [RFC] [PATCH v2 0/2] cgroups: implement moving a threadgroup's threads atomically with cgroup.procs Ben Blum
2010-05-30  1:31 ` Ben Blum [this message]
2010-05-30  1:33 ` [RFC] [PATCH 2/2] cgroups: make procs file writable Ben Blum
2010-05-31 17:52   ` Oleg Nesterov
2010-05-31 18:04     ` Oleg Nesterov
2010-06-01 18:57       ` Paul Menage
2010-06-02 14:06         ` Oleg Nesterov
2010-06-02 19:53           ` Paul Menage
2010-06-02 20:20             ` Oleg Nesterov
2010-06-02 20:31               ` Paul Menage
2010-06-02 20:58                 ` Oleg Nesterov
2010-06-02 21:12                   ` Paul Menage
2010-06-02 21:38                     ` Oleg Nesterov
2010-06-02 22:03                       ` Paul Menage
2010-06-03  4:44                         ` Ben Blum
2010-06-03  4:40                   ` Ben Blum
2010-06-03 14:48                     ` Oleg Nesterov
2010-06-03  4:56     ` Ben Blum
2010-06-03 14:43       ` Oleg Nesterov
     [not found] <200908202114.n7KLEN5H026646@imap1.linux-foundation.org>
2009-08-21 10:26 ` + cgroups-add-functionality-to-read-write-lock-clone_thread-forking-pe r-threadgroup.patch added to -mm tree Oleg Nesterov
2009-08-21 10:45   ` Oleg Nesterov
2009-08-21 23:37     ` Paul Menage
2009-08-22 13:09       ` Oleg Nesterov
2010-01-03 19:06         ` Ben Blum
2010-01-03 19:07           ` [RFC] [PATCH 1/2] cgroups: read-write lock CLONE_THREAD forking per threadgroup Ben Blum
2010-01-05 18:53             ` Oleg Nesterov
2010-01-17 20:48               ` Ben Blum
2010-03-22 10:22                 ` Oleg Nesterov
2010-03-22 23:57                   ` Paul Menage

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100530013136.GB762@ghc01.ghc.andrew.cmu.edu \
    --to=bblum@andrew.cmu.edu \
    --cc=akpm@linux-foundation.org \
    --cc=containers@lists.linux-foundation.org \
    --cc=ebiederm@xmission.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizf@cn.fujitsu.com \
    --cc=matthltc@us.ibm.com \
    --cc=menage@google.com \
    --cc=oleg@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).