From: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
To: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Cgroups <cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
Containers
<containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org>,
LKML <linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: [PATCH v2 06/10] cpuset: record old_mems_allowed in struct cpuset
Date: Wed, 5 Jun 2013 17:16:24 +0800 [thread overview]
Message-ID: <51AF01E8.8080205@huawei.com> (raw)
In-Reply-To: <51AF0183.8070602-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
When we update a cpuset's mems_allowed and thus update tasks'
mems_allowed, it's required to pass the old mems_allowed and new
mems_allowed to cpuset_migrate_mm().
Currently we save old mems_allowed in a temp local variable before
changing cpuset->mems_allowed. This patch changes it by saving
old mems_allowed in cpuset->old_mems_allowed.
This currently won't change any behavior, but it will later allow
us to keep tasks in empty cpusets.
Signed-off-by: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
kernel/cpuset.c | 62 +++++++++++++++++++++++++++++++++------------------------
1 file changed, 36 insertions(+), 26 deletions(-)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 535dce6..b848505 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -87,6 +87,18 @@ struct cpuset {
cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
+ /*
+ * This is old Memory Nodes tasks took on.
+ *
+ * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
+ * - A new cpuset's old_mems_allowed is initialized when some
+ * task is moved into it.
+ * - old_mems_allowed is used in cpuset_migrate_mm() when we change
+ * cpuset.mems_allowed and have tasks' nodemask updated, and
+ * then old_mems_allowed is updated to mems_allowed.
+ */
+ nodemask_t old_mems_allowed;
+
struct fmeter fmeter; /* memory_pressure filter */
/*
@@ -976,16 +988,12 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
static void cpuset_change_nodemask(struct task_struct *p,
struct cgroup_scanner *scan)
{
+ struct cpuset *cs = cgroup_cs(scan->cg);
struct mm_struct *mm;
- struct cpuset *cs;
int migrate;
- const nodemask_t *oldmem = scan->data;
- static nodemask_t newmems; /* protected by cpuset_mutex */
-
- cs = cgroup_cs(scan->cg);
- guarantee_online_mems(cs, &newmems);
+ nodemask_t *newmems = scan->data;
- cpuset_change_task_nodemask(p, &newmems);
+ cpuset_change_task_nodemask(p, newmems);
mm = get_task_mm(p);
if (!mm)
@@ -995,7 +1003,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
mpol_rebind_mm(mm, &cs->mems_allowed);
if (migrate)
- cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
+ cpuset_migrate_mm(mm, &cs->old_mems_allowed, newmems);
mmput(mm);
}
@@ -1004,25 +1012,26 @@ static void *cpuset_being_rebound;
/**
* update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
* @cs: the cpuset in which each task's mems_allowed mask needs to be changed
- * @oldmem: old mems_allowed of cpuset cs
* @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
*
* Called with cpuset_mutex held
* No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
* if @heap != NULL.
*/
-static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
- struct ptr_heap *heap)
+static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
{
+ static nodemask_t newmems; /* protected by cpuset_mutex */
struct cgroup_scanner scan;
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
+ guarantee_online_mems(cs, &newmems);
+
scan.cg = cs->css.cgroup;
scan.test_task = NULL;
scan.process_task = cpuset_change_nodemask;
scan.heap = heap;
- scan.data = (nodemask_t *)oldmem;
+ scan.data = &newmems;
/*
* The mpol_rebind_mm() call takes mmap_sem, which we couldn't
@@ -1036,6 +1045,12 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
*/
cgroup_scan_tasks(&scan);
+ /*
+ * All the tasks' nodemasks have been updated, update
+ * cs->old_mems_allowed.
+ */
+ cs->old_mems_allowed = newmems;
+
/* We're done rebinding vmas to this cpuset's new mems_allowed. */
cpuset_being_rebound = NULL;
}
@@ -1056,13 +1071,9 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
const char *buf)
{
- NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL);
int retval;
struct ptr_heap heap;
- if (!oldmem)
- return -ENOMEM;
-
/*
* top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
* it's read-only
@@ -1091,8 +1102,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
goto done;
}
}
- *oldmem = cs->mems_allowed;
- if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
+
+ if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
retval = 0; /* Too easy - nothing to do */
goto done;
}
@@ -1108,11 +1119,10 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
cs->mems_allowed = trialcs->mems_allowed;
mutex_unlock(&callback_mutex);
- update_tasks_nodemask(cs, oldmem, &heap);
+ update_tasks_nodemask(cs, &heap);
heap_free(&heap);
done:
- NODEMASK_FREE(oldmem);
return retval;
}
@@ -1425,7 +1435,6 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
* Change mm, possibly for multiple threads in a threadgroup. This is
* expensive and may sleep.
*/
- cpuset_attach_nodemask_to = cs->mems_allowed;
mm = get_task_mm(leader);
if (mm) {
mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
@@ -1435,6 +1444,8 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
mmput(mm);
}
+ cs->old_mems_allowed = cpuset_attach_nodemask_to;
+
cs->attach_in_progress--;
/*
@@ -2001,7 +2012,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
{
static cpumask_t off_cpus;
- static nodemask_t off_mems, tmp_mems;
+ static nodemask_t off_mems;
struct cpuset *cs = container_of(work, struct cpuset, hotplug_work);
bool is_empty;
@@ -2020,11 +2031,10 @@ static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
/* remove offline mems from @cs */
if (!nodes_empty(off_mems)) {
- tmp_mems = cs->mems_allowed;
mutex_lock(&callback_mutex);
nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems);
mutex_unlock(&callback_mutex);
- update_tasks_nodemask(cs, &tmp_mems, NULL);
+ update_tasks_nodemask(cs, NULL);
}
is_empty = cpumask_empty(cs->cpus_allowed) ||
@@ -2116,11 +2126,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
/* synchronize mems_allowed to N_MEMORY */
if (mems_updated) {
- tmp_mems = top_cpuset.mems_allowed;
mutex_lock(&callback_mutex);
top_cpuset.mems_allowed = new_mems;
mutex_unlock(&callback_mutex);
- update_tasks_nodemask(&top_cpuset, &tmp_mems, NULL);
+ update_tasks_nodemask(&top_cpuset, NULL);
}
/* if cpus or mems went down, we need to propagate to descendants */
@@ -2186,6 +2195,7 @@ void __init cpuset_init_smp(void)
{
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
top_cpuset.mems_allowed = node_states[N_MEMORY];
+ top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
--
1.8.0.2
WARNING: multiple messages have this Message-ID (diff)
From: Li Zefan <lizefan@huawei.com>
To: Tejun Heo <tj@kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
Cgroups <cgroups@vger.kernel.org>,
Containers <containers@lists.linux-foundation.org>
Subject: [PATCH v2 06/10] cpuset: record old_mems_allowed in struct cpuset
Date: Wed, 5 Jun 2013 17:16:24 +0800 [thread overview]
Message-ID: <51AF01E8.8080205@huawei.com> (raw)
In-Reply-To: <51AF0183.8070602@huawei.com>
When we update a cpuset's mems_allowed and thus update tasks'
mems_allowed, it's required to pass the old mems_allowed and new
mems_allowed to cpuset_migrate_mm().
Currently we save old mems_allowed in a temp local variable before
changing cpuset->mems_allowed. This patch changes it by saving
old mems_allowed in cpuset->old_mems_allowed.
This currently won't change any behavior, but it will later allow
us to keep tasks in empty cpusets.
Signed-off-by: Li Zefan <lizefan@huawei.com>
---
kernel/cpuset.c | 62 +++++++++++++++++++++++++++++++++------------------------
1 file changed, 36 insertions(+), 26 deletions(-)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 535dce6..b848505 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -87,6 +87,18 @@ struct cpuset {
cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
+ /*
+ * This is old Memory Nodes tasks took on.
+ *
+ * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
+ * - A new cpuset's old_mems_allowed is initialized when some
+ * task is moved into it.
+ * - old_mems_allowed is used in cpuset_migrate_mm() when we change
+ * cpuset.mems_allowed and have tasks' nodemask updated, and
+ * then old_mems_allowed is updated to mems_allowed.
+ */
+ nodemask_t old_mems_allowed;
+
struct fmeter fmeter; /* memory_pressure filter */
/*
@@ -976,16 +988,12 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
static void cpuset_change_nodemask(struct task_struct *p,
struct cgroup_scanner *scan)
{
+ struct cpuset *cs = cgroup_cs(scan->cg);
struct mm_struct *mm;
- struct cpuset *cs;
int migrate;
- const nodemask_t *oldmem = scan->data;
- static nodemask_t newmems; /* protected by cpuset_mutex */
-
- cs = cgroup_cs(scan->cg);
- guarantee_online_mems(cs, &newmems);
+ nodemask_t *newmems = scan->data;
- cpuset_change_task_nodemask(p, &newmems);
+ cpuset_change_task_nodemask(p, newmems);
mm = get_task_mm(p);
if (!mm)
@@ -995,7 +1003,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
mpol_rebind_mm(mm, &cs->mems_allowed);
if (migrate)
- cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
+ cpuset_migrate_mm(mm, &cs->old_mems_allowed, newmems);
mmput(mm);
}
@@ -1004,25 +1012,26 @@ static void *cpuset_being_rebound;
/**
* update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
* @cs: the cpuset in which each task's mems_allowed mask needs to be changed
- * @oldmem: old mems_allowed of cpuset cs
* @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
*
* Called with cpuset_mutex held
* No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
* if @heap != NULL.
*/
-static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
- struct ptr_heap *heap)
+static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
{
+ static nodemask_t newmems; /* protected by cpuset_mutex */
struct cgroup_scanner scan;
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
+ guarantee_online_mems(cs, &newmems);
+
scan.cg = cs->css.cgroup;
scan.test_task = NULL;
scan.process_task = cpuset_change_nodemask;
scan.heap = heap;
- scan.data = (nodemask_t *)oldmem;
+ scan.data = &newmems;
/*
* The mpol_rebind_mm() call takes mmap_sem, which we couldn't
@@ -1036,6 +1045,12 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
*/
cgroup_scan_tasks(&scan);
+ /*
+ * All the tasks' nodemasks have been updated, update
+ * cs->old_mems_allowed.
+ */
+ cs->old_mems_allowed = newmems;
+
/* We're done rebinding vmas to this cpuset's new mems_allowed. */
cpuset_being_rebound = NULL;
}
@@ -1056,13 +1071,9 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
const char *buf)
{
- NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL);
int retval;
struct ptr_heap heap;
- if (!oldmem)
- return -ENOMEM;
-
/*
* top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
* it's read-only
@@ -1091,8 +1102,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
goto done;
}
}
- *oldmem = cs->mems_allowed;
- if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
+
+ if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
retval = 0; /* Too easy - nothing to do */
goto done;
}
@@ -1108,11 +1119,10 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
cs->mems_allowed = trialcs->mems_allowed;
mutex_unlock(&callback_mutex);
- update_tasks_nodemask(cs, oldmem, &heap);
+ update_tasks_nodemask(cs, &heap);
heap_free(&heap);
done:
- NODEMASK_FREE(oldmem);
return retval;
}
@@ -1425,7 +1435,6 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
* Change mm, possibly for multiple threads in a threadgroup. This is
* expensive and may sleep.
*/
- cpuset_attach_nodemask_to = cs->mems_allowed;
mm = get_task_mm(leader);
if (mm) {
mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
@@ -1435,6 +1444,8 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
mmput(mm);
}
+ cs->old_mems_allowed = cpuset_attach_nodemask_to;
+
cs->attach_in_progress--;
/*
@@ -2001,7 +2012,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
{
static cpumask_t off_cpus;
- static nodemask_t off_mems, tmp_mems;
+ static nodemask_t off_mems;
struct cpuset *cs = container_of(work, struct cpuset, hotplug_work);
bool is_empty;
@@ -2020,11 +2031,10 @@ static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
/* remove offline mems from @cs */
if (!nodes_empty(off_mems)) {
- tmp_mems = cs->mems_allowed;
mutex_lock(&callback_mutex);
nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems);
mutex_unlock(&callback_mutex);
- update_tasks_nodemask(cs, &tmp_mems, NULL);
+ update_tasks_nodemask(cs, NULL);
}
is_empty = cpumask_empty(cs->cpus_allowed) ||
@@ -2116,11 +2126,10 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
/* synchronize mems_allowed to N_MEMORY */
if (mems_updated) {
- tmp_mems = top_cpuset.mems_allowed;
mutex_lock(&callback_mutex);
top_cpuset.mems_allowed = new_mems;
mutex_unlock(&callback_mutex);
- update_tasks_nodemask(&top_cpuset, &tmp_mems, NULL);
+ update_tasks_nodemask(&top_cpuset, NULL);
}
/* if cpus or mems went down, we need to propagate to descendants */
@@ -2186,6 +2195,7 @@ void __init cpuset_init_smp(void)
{
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
top_cpuset.mems_allowed = node_states[N_MEMORY];
+ top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
--
1.8.0.2
next prev parent reply other threads:[~2013-06-05 9:16 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-06-05 9:14 [PATCH v2 00/10] cpuset: implement sane hierarchy behaviors Li Zefan
2013-06-05 9:14 ` Li Zefan
[not found] ` <51AF0183.8070602-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2013-06-05 9:15 ` [PATCH v2 01/10] cpuset: remove redundant check in cpuset_cpus_allowed_fallback() Li Zefan
2013-06-05 9:15 ` Li Zefan
2013-06-05 9:15 ` [PATCH v2 02/10] cpuset: cleanup guarantee_online_{cpus|mems}() Li Zefan
2013-06-05 9:15 ` Li Zefan
2013-06-05 9:15 ` Li Zefan
2013-06-05 9:15 ` [PATCH v2 03/10] cpuset: remove unnecessary variable in cpuset_attach() Li Zefan
2013-06-05 9:15 ` Li Zefan
2013-06-05 9:15 ` Li Zefan
2013-06-05 9:15 ` [PATCH v2 04/10] cpuset: remove cpuset_test_cpumask() Li Zefan
2013-06-05 9:15 ` Li Zefan
2013-06-05 9:15 ` [PATCH v2 05/10] cpuset: re-structure update_cpumask() a bit Li Zefan
2013-06-05 9:15 ` Li Zefan
[not found] ` <51AF01CF.1070706-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2013-06-05 20:57 ` Tejun Heo
2013-06-05 20:57 ` Tejun Heo
2013-06-05 9:16 ` Li Zefan [this message]
2013-06-05 9:16 ` [PATCH v2 06/10] cpuset: record old_mems_allowed in struct cpuset Li Zefan
[not found] ` <51AF01E8.8080205-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2013-06-05 19:45 ` Tejun Heo
2013-06-05 19:45 ` Tejun Heo
[not found] ` <20130605194522.GG10693-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2013-06-06 9:58 ` Li Zefan
2013-06-06 9:58 ` Li Zefan
2013-06-05 9:16 ` [PATCH v2 07/10] cpuset: introduce effective_{cpumask|nodemask}_cpuset() Li Zefan
2013-06-05 9:16 ` Li Zefan
2013-06-05 9:16 ` [PATCH v2 08/10] cpuset: allow to keep tasks in empty cpusets Li Zefan
2013-06-05 9:16 ` Li Zefan
[not found] ` <51AF020B.3030701-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2013-06-05 20:51 ` Tejun Heo
2013-06-05 20:51 ` Tejun Heo
[not found] ` <20130605205146.GI10693-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2013-06-06 10:26 ` Li Zefan
2013-06-06 10:26 ` Li Zefan
[not found] ` <51B063E6.30107-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
2013-06-06 21:24 ` Tejun Heo
2013-06-06 21:24 ` Tejun Heo
2013-06-06 21:24 ` Tejun Heo
2013-06-06 10:26 ` Li Zefan
2013-06-05 9:17 ` [PATCH v2 09/10] cpuset: allow to move tasks to " Li Zefan
2013-06-05 9:17 ` Li Zefan
2013-06-05 9:17 ` [PATCH v2 10/10] cpuset: fix to migrate mm correctly in a corner case Li Zefan
2013-06-05 9:17 ` Li Zefan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=51AF01E8.8080205@huawei.com \
--to=lizefan-hv44wf8li93qt0dzr+alfa@public.gmane.org \
--cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
--cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.