[PATCH 2/4] cpuset: rewrite update_tasks_nodemask()

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Li Zefan <lizf@cn.fujitsu.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Menage <menage@google.com>, LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 2/4] cpuset: rewrite update_tasks_nodemask()
Date: Thu, 26 Feb 2009 15:03:35 +0800	[thread overview]
Message-ID: <49A63EC7.2000702@cn.fujitsu.com> (raw)
In-Reply-To: <49A63E77.9060603@cn.fujitsu.com>

This patch uses cgroup_scan_tasks() to rebind tasks' vmas to new cpuset's
mems_allowed.

Not only simplify the code largely, but also avoid allocating an array
to hold mm pointers of all the tasks in the cpuset. This array can be big
(size > PAGESIZE) if we have lots of tasks in that cpuset, thus has a
chance to fail the allocation when under memory stress.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
---
 kernel/cpuset.c |  109 ++++++++++++++++++++-----------------------------------
 1 files changed, 39 insertions(+), 70 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 20c2db2..01a111f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1026,6 +1026,31 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 	mutex_unlock(&callback_mutex);
 }
 
+/*
+ * Rebind task's vmas to cpuset's new mems_allowed, and migrate pages to new
+ * nodes if memory_migrate flag is set. Called with cgroup_mutex held.
+ */
+static void cpuset_change_nodemask(struct task_struct *p,
+				   struct cgroup_scanner *scan)
+{
+	struct mm_struct *mm;
+	struct cpuset *cs;
+	int migrate;
+	const nodemask_t *oldmem = scan->data;
+
+	mm = get_task_mm(p);
+	if (!mm)
+		return;
+
+	cs = cgroup_cs(scan->cg);
+	migrate = is_memory_migrate(cs);
+
+	mpol_rebind_mm(mm, &cs->mems_allowed);
+	if (migrate)
+		cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
+	mmput(mm);
+}
+
 static void *cpuset_being_rebound;
 
 /**
@@ -1038,88 +1063,32 @@ static void *cpuset_being_rebound;
  */
 static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
 {
-	struct task_struct *p;
-	struct mm_struct **mmarray;
-	int i, n, ntasks;
-	int migrate;
-	int fudge;
-	struct cgroup_iter it;
 	int retval;
+	struct cgroup_scanner scan;
 
 	cpuset_being_rebound = cs;		/* causes mpol_dup() rebind */
 
-	fudge = 10;				/* spare mmarray[] slots */
-	fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
-	retval = -ENOMEM;
-
-	/*
-	 * Allocate mmarray[] to hold mm reference for each task
-	 * in cpuset cs.  Can't kmalloc GFP_KERNEL while holding
-	 * tasklist_lock.  We could use GFP_ATOMIC, but with a
-	 * few more lines of code, we can retry until we get a big
-	 * enough mmarray[] w/o using GFP_ATOMIC.
-	 */
-	while (1) {
-		ntasks = cgroup_task_count(cs->css.cgroup);  /* guess */
-		ntasks += fudge;
-		mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
-		if (!mmarray)
-			goto done;
-		read_lock(&tasklist_lock);		/* block fork */
-		if (cgroup_task_count(cs->css.cgroup) <= ntasks)
-			break;				/* got enough */
-		read_unlock(&tasklist_lock);		/* try again */
-		kfree(mmarray);
-	}
-
-	n = 0;
-
-	/* Load up mmarray[] with mm reference for each task in cpuset. */
-	cgroup_iter_start(cs->css.cgroup, &it);
-	while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
-		struct mm_struct *mm;
-
-		if (n >= ntasks) {
-			printk(KERN_WARNING
-				"Cpuset mempolicy rebind incomplete.\n");
-			break;
-		}
-		mm = get_task_mm(p);
-		if (!mm)
-			continue;
-		mmarray[n++] = mm;
-	}
-	cgroup_iter_end(cs->css.cgroup, &it);
-	read_unlock(&tasklist_lock);
+	scan.cg = cs->css.cgroup;
+	scan.test_task = NULL;
+	scan.process_task = cpuset_change_nodemask;
+	scan.heap = NULL;
+	scan.data = (nodemask_t *)oldmem;
 
 	/*
-	 * Now that we've dropped the tasklist spinlock, we can
-	 * rebind the vma mempolicies of each mm in mmarray[] to their
-	 * new cpuset, and release that mm.  The mpol_rebind_mm()
-	 * call takes mmap_sem, which we couldn't take while holding
-	 * tasklist_lock.  Forks can happen again now - the mpol_dup()
-	 * cpuset_being_rebound check will catch such forks, and rebind
-	 * their vma mempolicies too.  Because we still hold the global
-	 * cgroup_mutex, we know that no other rebind effort will
-	 * be contending for the global variable cpuset_being_rebound.
+	 * The mpol_rebind_mm() call takes mmap_sem, which we couldn't
+	 * take while holding tasklist_lock.  Forks can happen - the
+	 * mpol_dup() cpuset_being_rebound check will catch such forks,
+	 * and rebind their vma mempolicies too.  Because we still hold
+	 * the global cgroup_mutex, we know that no other rebind effort
+	 * will be contending for the global variable cpuset_being_rebound.
 	 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
 	 * is idempotent.  Also migrate pages in each mm to new nodes.
 	 */
-	migrate = is_memory_migrate(cs);
-	for (i = 0; i < n; i++) {
-		struct mm_struct *mm = mmarray[i];
-
-		mpol_rebind_mm(mm, &cs->mems_allowed);
-		if (migrate)
-			cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
-		mmput(mm);
-	}
+	retval = cgroup_scan_tasks(&scan);
 
 	/* We're done rebinding vmas to this cpuset's new mems_allowed. */
-	kfree(mmarray);
 	cpuset_being_rebound = NULL;
-	retval = 0;
-done:
+
 	return retval;
 }
 
-- 1.5.4.rc3

next prev parent reply	other threads:[~2009-02-26  7:03 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-02-26  7:02 [PATCH 0/4] cpuset: cleanups and fixes Li Zefan
2009-02-26  7:02 ` [PATCH 1/4] cgroups: add 'data' field to struct cgroup_scanner Li Zefan
2009-02-26  7:03 ` Li Zefan [this message]
2009-02-26  7:04 ` [PATCH 3/4] cpuset: avoid changing cpuset's mems when errno returned Li Zefan
2009-02-26  7:04 ` [PATCH 4/4] cpuset: remove struct cpuset_hotplug_scanner Li Zefan
2009-02-26  7:39 ` [PATCH 0/4] cpuset: cleanups and fixes KAMEZAWA Hiroyuki
2009-02-26  7:58   ` Li Zefan
2009-02-26  8:09     ` KAMEZAWA Hiroyuki

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:20c2db2 dfblob:01a111f )
 OR (
bs:"[PATCH 2/4] cpuset: rewrite update_tasks_nodemask()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=49A63EC7.2000702@cn.fujitsu.com \
    --to=lizf@cn.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=menage@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox