* [PATCH 1/7] cpuset: migrate memory only for threadgroup leaders
2015-05-18 19:49 ` Tejun Heo
@ 2015-05-18 19:49 ` Tejun Heo
-1 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, hannes-druUgvl0LCNAfugRpC6u6w,
mhocko-AlSwsSmVLrQ, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Tejun Heo
If memory_migrate flag is set, cpuset migrates memory according to the
destnation css's nodemask. The current implementation migrates memory
whenever any thread of a process is migrated making the behavior
somewhat arbitrary. Let's tie memory operations to the threadgroup
leader so that memory is migrated only when the leader is migrated.
While this is a behavior change, given the inherent fuziness, this
change is not too likely to be noticed and allows us to clearly define
who owns the memory (always the leader) and helps the planned atomic
multi-process migration.
Note that we're currently migrating memory in migration path proper
while holding all the locks. In the long term, this should be moved
out to an async work item.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
kernel/cpuset.c | 40 ++++++++++++++++++++++------------------
1 file changed, 22 insertions(+), 18 deletions(-)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ee14e3a..43db5b7 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1484,7 +1484,6 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
{
/* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to;
- struct mm_struct *mm;
struct task_struct *task;
struct task_struct *leader = cgroup_taskset_first(tset);
struct cpuset *cs = css_cs(css);
@@ -1512,26 +1511,31 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
}
/*
- * Change mm, possibly for multiple threads in a threadgroup. This is
- * expensive and may sleep.
+ * Change mm, possibly for multiple threads in a threadgroup. This
+ * is expensive and may sleep and should be moved outside migration
+ * path proper.
*/
cpuset_attach_nodemask_to = cs->effective_mems;
- mm = get_task_mm(leader);
- if (mm) {
- mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
-
- /*
- * old_mems_allowed is the same with mems_allowed here, except
- * if this task is being moved automatically due to hotplug.
- * In that case @mems_allowed has been updated and is empty,
- * so @old_mems_allowed is the right nodesets that we migrate
- * mm from.
- */
- if (is_memory_migrate(cs)) {
- cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
- &cpuset_attach_nodemask_to);
+ if (thread_group_leader(leader)) {
+ struct mm_struct *mm = get_task_mm(leader);
+
+ if (mm) {
+ mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
+
+ /*
+ * old_mems_allowed is the same with mems_allowed
+ * here, except if this task is being moved
+ * automatically due to hotplug. In that case
+ * @mems_allowed has been updated and is empty, so
+ * @old_mems_allowed is the right nodesets that we
+ * migrate mm from.
+ */
+ if (is_memory_migrate(cs)) {
+ cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
+ &cpuset_attach_nodemask_to);
+ }
+ mmput(mm);
}
- mmput(mm);
}
cs->old_mems_allowed = cpuset_attach_nodemask_to;
--
2.4.0
^ permalink raw reply related [flat|nested] 59+ messages in thread* [PATCH 1/7] cpuset: migrate memory only for threadgroup leaders
@ 2015-05-18 19:49 ` Tejun Heo
0 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, hannes, mhocko, linux-mm, Tejun Heo
If memory_migrate flag is set, cpuset migrates memory according to the
destnation css's nodemask. The current implementation migrates memory
whenever any thread of a process is migrated making the behavior
somewhat arbitrary. Let's tie memory operations to the threadgroup
leader so that memory is migrated only when the leader is migrated.
While this is a behavior change, given the inherent fuziness, this
change is not too likely to be noticed and allows us to clearly define
who owns the memory (always the leader) and helps the planned atomic
multi-process migration.
Note that we're currently migrating memory in migration path proper
while holding all the locks. In the long term, this should be moved
out to an async work item.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
---
kernel/cpuset.c | 40 ++++++++++++++++++++++------------------
1 file changed, 22 insertions(+), 18 deletions(-)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ee14e3a..43db5b7 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1484,7 +1484,6 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
{
/* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to;
- struct mm_struct *mm;
struct task_struct *task;
struct task_struct *leader = cgroup_taskset_first(tset);
struct cpuset *cs = css_cs(css);
@@ -1512,26 +1511,31 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
}
/*
- * Change mm, possibly for multiple threads in a threadgroup. This is
- * expensive and may sleep.
+ * Change mm, possibly for multiple threads in a threadgroup. This
+ * is expensive and may sleep and should be moved outside migration
+ * path proper.
*/
cpuset_attach_nodemask_to = cs->effective_mems;
- mm = get_task_mm(leader);
- if (mm) {
- mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
-
- /*
- * old_mems_allowed is the same with mems_allowed here, except
- * if this task is being moved automatically due to hotplug.
- * In that case @mems_allowed has been updated and is empty,
- * so @old_mems_allowed is the right nodesets that we migrate
- * mm from.
- */
- if (is_memory_migrate(cs)) {
- cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
- &cpuset_attach_nodemask_to);
+ if (thread_group_leader(leader)) {
+ struct mm_struct *mm = get_task_mm(leader);
+
+ if (mm) {
+ mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
+
+ /*
+ * old_mems_allowed is the same with mems_allowed
+ * here, except if this task is being moved
+ * automatically due to hotplug. In that case
+ * @mems_allowed has been updated and is empty, so
+ * @old_mems_allowed is the right nodesets that we
+ * migrate mm from.
+ */
+ if (is_memory_migrate(cs)) {
+ cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
+ &cpuset_attach_nodemask_to);
+ }
+ mmput(mm);
}
- mmput(mm);
}
cs->old_mems_allowed = cpuset_attach_nodemask_to;
--
2.4.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 59+ messages in thread
* [PATCH 2/7] memcg: restructure mem_cgroup_can_attach()
2015-05-18 19:49 ` Tejun Heo
@ 2015-05-18 19:49 ` Tejun Heo
-1 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, hannes-druUgvl0LCNAfugRpC6u6w,
mhocko-AlSwsSmVLrQ, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Tejun Heo
Restructure it to lower nesting level and help the planned threadgroup
leader iteration changes.
This is pure reorganization.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
Cc: Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>
---
mm/memcontrol.c | 61 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 32 insertions(+), 29 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 14c2f20..b1b834d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4997,10 +4997,12 @@ static void mem_cgroup_clear_mc(void)
static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
- struct task_struct *p = cgroup_taskset_first(tset);
- int ret = 0;
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct mem_cgroup *from;
+ struct task_struct *p;
+ struct mm_struct *mm;
unsigned long move_flags;
+ int ret = 0;
/*
* We are now commited to this value whatever it is. Changes in this
@@ -5008,36 +5010,37 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
* So we need to save it, and keep it going.
*/
move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
- if (move_flags) {
- struct mm_struct *mm;
- struct mem_cgroup *from = mem_cgroup_from_task(p);
+ if (!move_flags)
+ return 0;
- VM_BUG_ON(from == memcg);
+ p = cgroup_taskset_first(tset);
+ from = mem_cgroup_from_task(p);
- mm = get_task_mm(p);
- if (!mm)
- return 0;
- /* We move charges only when we move a owner of the mm */
- if (mm->owner == p) {
- VM_BUG_ON(mc.from);
- VM_BUG_ON(mc.to);
- VM_BUG_ON(mc.precharge);
- VM_BUG_ON(mc.moved_charge);
- VM_BUG_ON(mc.moved_swap);
-
- spin_lock(&mc.lock);
- mc.from = from;
- mc.to = memcg;
- mc.flags = move_flags;
- spin_unlock(&mc.lock);
- /* We set mc.moving_task later */
-
- ret = mem_cgroup_precharge_mc(mm);
- if (ret)
- mem_cgroup_clear_mc();
- }
- mmput(mm);
+ VM_BUG_ON(from == memcg);
+
+ mm = get_task_mm(p);
+ if (!mm)
+ return 0;
+ /* We move charges only when we move a owner of the mm */
+ if (mm->owner == p) {
+ VM_BUG_ON(mc.from);
+ VM_BUG_ON(mc.to);
+ VM_BUG_ON(mc.precharge);
+ VM_BUG_ON(mc.moved_charge);
+ VM_BUG_ON(mc.moved_swap);
+
+ spin_lock(&mc.lock);
+ mc.from = from;
+ mc.to = memcg;
+ mc.flags = move_flags;
+ spin_unlock(&mc.lock);
+ /* We set mc.moving_task later */
+
+ ret = mem_cgroup_precharge_mc(mm);
+ if (ret)
+ mem_cgroup_clear_mc();
}
+ mmput(mm);
return ret;
}
--
2.4.0
^ permalink raw reply related [flat|nested] 59+ messages in thread* [PATCH 2/7] memcg: restructure mem_cgroup_can_attach()
@ 2015-05-18 19:49 ` Tejun Heo
0 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, hannes, mhocko, linux-mm, Tejun Heo
Restructure it to lower nesting level and help the planned threadgroup
leader iteration changes.
This is pure reorganization.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
---
mm/memcontrol.c | 61 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 32 insertions(+), 29 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 14c2f20..b1b834d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4997,10 +4997,12 @@ static void mem_cgroup_clear_mc(void)
static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
- struct task_struct *p = cgroup_taskset_first(tset);
- int ret = 0;
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct mem_cgroup *from;
+ struct task_struct *p;
+ struct mm_struct *mm;
unsigned long move_flags;
+ int ret = 0;
/*
* We are now commited to this value whatever it is. Changes in this
@@ -5008,36 +5010,37 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
* So we need to save it, and keep it going.
*/
move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
- if (move_flags) {
- struct mm_struct *mm;
- struct mem_cgroup *from = mem_cgroup_from_task(p);
+ if (!move_flags)
+ return 0;
- VM_BUG_ON(from == memcg);
+ p = cgroup_taskset_first(tset);
+ from = mem_cgroup_from_task(p);
- mm = get_task_mm(p);
- if (!mm)
- return 0;
- /* We move charges only when we move a owner of the mm */
- if (mm->owner == p) {
- VM_BUG_ON(mc.from);
- VM_BUG_ON(mc.to);
- VM_BUG_ON(mc.precharge);
- VM_BUG_ON(mc.moved_charge);
- VM_BUG_ON(mc.moved_swap);
-
- spin_lock(&mc.lock);
- mc.from = from;
- mc.to = memcg;
- mc.flags = move_flags;
- spin_unlock(&mc.lock);
- /* We set mc.moving_task later */
-
- ret = mem_cgroup_precharge_mc(mm);
- if (ret)
- mem_cgroup_clear_mc();
- }
- mmput(mm);
+ VM_BUG_ON(from == memcg);
+
+ mm = get_task_mm(p);
+ if (!mm)
+ return 0;
+ /* We move charges only when we move a owner of the mm */
+ if (mm->owner == p) {
+ VM_BUG_ON(mc.from);
+ VM_BUG_ON(mc.to);
+ VM_BUG_ON(mc.precharge);
+ VM_BUG_ON(mc.moved_charge);
+ VM_BUG_ON(mc.moved_swap);
+
+ spin_lock(&mc.lock);
+ mc.from = from;
+ mc.to = memcg;
+ mc.flags = move_flags;
+ spin_unlock(&mc.lock);
+ /* We set mc.moving_task later */
+
+ ret = mem_cgroup_precharge_mc(mm);
+ if (ret)
+ mem_cgroup_clear_mc();
}
+ mmput(mm);
return ret;
}
--
2.4.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 59+ messages in thread[parent not found: <1431978595-12176-3-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>]
* Re: [PATCH 2/7] memcg: restructure mem_cgroup_can_attach()
2015-05-18 19:49 ` Tejun Heo
@ 2015-05-19 9:03 ` Michal Hocko
-1 siblings, 0 replies; 59+ messages in thread
From: Michal Hocko @ 2015-05-19 9:03 UTC (permalink / raw)
To: Tejun Heo
Cc: lizefan-hv44wF8Li93QT0dZR+AlfA, cgroups-u79uwXL29TY76Z2rM5mHXA,
hannes-druUgvl0LCNAfugRpC6u6w, linux-mm-Bw31MaZKKs3YtjvyW6yDsg
On Mon 18-05-15 15:49:50, Tejun Heo wrote:
> Restructure it to lower nesting level and help the planned threadgroup
> leader iteration changes.
>
> This is pure reorganization.
>
> Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
> Cc: Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>
Acked-by: Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>
> ---
> mm/memcontrol.c | 61 ++++++++++++++++++++++++++++++---------------------------
> 1 file changed, 32 insertions(+), 29 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 14c2f20..b1b834d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4997,10 +4997,12 @@ static void mem_cgroup_clear_mc(void)
> static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
> struct cgroup_taskset *tset)
> {
> - struct task_struct *p = cgroup_taskset_first(tset);
> - int ret = 0;
> struct mem_cgroup *memcg = mem_cgroup_from_css(css);
> + struct mem_cgroup *from;
> + struct task_struct *p;
> + struct mm_struct *mm;
> unsigned long move_flags;
> + int ret = 0;
>
> /*
> * We are now commited to this value whatever it is. Changes in this
> @@ -5008,36 +5010,37 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
> * So we need to save it, and keep it going.
> */
> move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
> - if (move_flags) {
> - struct mm_struct *mm;
> - struct mem_cgroup *from = mem_cgroup_from_task(p);
> + if (!move_flags)
> + return 0;
>
> - VM_BUG_ON(from == memcg);
> + p = cgroup_taskset_first(tset);
> + from = mem_cgroup_from_task(p);
>
> - mm = get_task_mm(p);
> - if (!mm)
> - return 0;
> - /* We move charges only when we move a owner of the mm */
> - if (mm->owner == p) {
> - VM_BUG_ON(mc.from);
> - VM_BUG_ON(mc.to);
> - VM_BUG_ON(mc.precharge);
> - VM_BUG_ON(mc.moved_charge);
> - VM_BUG_ON(mc.moved_swap);
> -
> - spin_lock(&mc.lock);
> - mc.from = from;
> - mc.to = memcg;
> - mc.flags = move_flags;
> - spin_unlock(&mc.lock);
> - /* We set mc.moving_task later */
> -
> - ret = mem_cgroup_precharge_mc(mm);
> - if (ret)
> - mem_cgroup_clear_mc();
> - }
> - mmput(mm);
> + VM_BUG_ON(from == memcg);
> +
> + mm = get_task_mm(p);
> + if (!mm)
> + return 0;
> + /* We move charges only when we move a owner of the mm */
> + if (mm->owner == p) {
> + VM_BUG_ON(mc.from);
> + VM_BUG_ON(mc.to);
> + VM_BUG_ON(mc.precharge);
> + VM_BUG_ON(mc.moved_charge);
> + VM_BUG_ON(mc.moved_swap);
> +
> + spin_lock(&mc.lock);
> + mc.from = from;
> + mc.to = memcg;
> + mc.flags = move_flags;
> + spin_unlock(&mc.lock);
> + /* We set mc.moving_task later */
> +
> + ret = mem_cgroup_precharge_mc(mm);
> + if (ret)
> + mem_cgroup_clear_mc();
> }
> + mmput(mm);
> return ret;
> }
>
> --
> 2.4.0
>
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 59+ messages in thread* Re: [PATCH 2/7] memcg: restructure mem_cgroup_can_attach()
@ 2015-05-19 9:03 ` Michal Hocko
0 siblings, 0 replies; 59+ messages in thread
From: Michal Hocko @ 2015-05-19 9:03 UTC (permalink / raw)
To: Tejun Heo; +Cc: lizefan, cgroups, hannes, linux-mm
On Mon 18-05-15 15:49:50, Tejun Heo wrote:
> Restructure it to lower nesting level and help the planned threadgroup
> leader iteration changes.
>
> This is pure reorganization.
>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.cz>
> ---
> mm/memcontrol.c | 61 ++++++++++++++++++++++++++++++---------------------------
> 1 file changed, 32 insertions(+), 29 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 14c2f20..b1b834d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4997,10 +4997,12 @@ static void mem_cgroup_clear_mc(void)
> static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
> struct cgroup_taskset *tset)
> {
> - struct task_struct *p = cgroup_taskset_first(tset);
> - int ret = 0;
> struct mem_cgroup *memcg = mem_cgroup_from_css(css);
> + struct mem_cgroup *from;
> + struct task_struct *p;
> + struct mm_struct *mm;
> unsigned long move_flags;
> + int ret = 0;
>
> /*
> * We are now commited to this value whatever it is. Changes in this
> @@ -5008,36 +5010,37 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
> * So we need to save it, and keep it going.
> */
> move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
> - if (move_flags) {
> - struct mm_struct *mm;
> - struct mem_cgroup *from = mem_cgroup_from_task(p);
> + if (!move_flags)
> + return 0;
>
> - VM_BUG_ON(from == memcg);
> + p = cgroup_taskset_first(tset);
> + from = mem_cgroup_from_task(p);
>
> - mm = get_task_mm(p);
> - if (!mm)
> - return 0;
> - /* We move charges only when we move a owner of the mm */
> - if (mm->owner == p) {
> - VM_BUG_ON(mc.from);
> - VM_BUG_ON(mc.to);
> - VM_BUG_ON(mc.precharge);
> - VM_BUG_ON(mc.moved_charge);
> - VM_BUG_ON(mc.moved_swap);
> -
> - spin_lock(&mc.lock);
> - mc.from = from;
> - mc.to = memcg;
> - mc.flags = move_flags;
> - spin_unlock(&mc.lock);
> - /* We set mc.moving_task later */
> -
> - ret = mem_cgroup_precharge_mc(mm);
> - if (ret)
> - mem_cgroup_clear_mc();
> - }
> - mmput(mm);
> + VM_BUG_ON(from == memcg);
> +
> + mm = get_task_mm(p);
> + if (!mm)
> + return 0;
> + /* We move charges only when we move a owner of the mm */
> + if (mm->owner == p) {
> + VM_BUG_ON(mc.from);
> + VM_BUG_ON(mc.to);
> + VM_BUG_ON(mc.precharge);
> + VM_BUG_ON(mc.moved_charge);
> + VM_BUG_ON(mc.moved_swap);
> +
> + spin_lock(&mc.lock);
> + mc.from = from;
> + mc.to = memcg;
> + mc.flags = move_flags;
> + spin_unlock(&mc.lock);
> + /* We set mc.moving_task later */
> +
> + ret = mem_cgroup_precharge_mc(mm);
> + if (ret)
> + mem_cgroup_clear_mc();
> }
> + mmput(mm);
> return ret;
> }
>
> --
> 2.4.0
>
--
Michal Hocko
SUSE Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 59+ messages in thread
* [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
2015-05-18 19:49 ` Tejun Heo
@ 2015-05-18 19:49 ` Tejun Heo
-1 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, hannes-druUgvl0LCNAfugRpC6u6w,
mhocko-AlSwsSmVLrQ, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Tejun Heo
If move_charge flag is set, memcg tries to move memory charges to the
destnation css. The current implementation migrates memory whenever
any thread of a process is migrated making the behavior somewhat
arbitrary. Let's tie memory operations to the threadgroup leader so
that memory is migrated only when the leader is migrated.
While this is a behavior change, given the inherent fuziness, this
change is not too likely to be noticed and allows us to clearly define
who owns the memory (always the leader) and helps the planned atomic
multi-process migration.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
Cc: Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>
---
mm/memcontrol.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b1b834d..74fcea3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5014,6 +5014,9 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
return 0;
p = cgroup_taskset_first(tset);
+ if (!thread_group_leader(p))
+ return 0;
+
from = mem_cgroup_from_task(p);
VM_BUG_ON(from == memcg);
--
2.4.0
^ permalink raw reply related [flat|nested] 59+ messages in thread
* [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
@ 2015-05-18 19:49 ` Tejun Heo
0 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, hannes, mhocko, linux-mm, Tejun Heo
If move_charge flag is set, memcg tries to move memory charges to the
destnation css. The current implementation migrates memory whenever
any thread of a process is migrated making the behavior somewhat
arbitrary. Let's tie memory operations to the threadgroup leader so
that memory is migrated only when the leader is migrated.
While this is a behavior change, given the inherent fuziness, this
change is not too likely to be noticed and allows us to clearly define
who owns the memory (always the leader) and helps the planned atomic
multi-process migration.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
---
mm/memcontrol.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b1b834d..74fcea3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5014,6 +5014,9 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
return 0;
p = cgroup_taskset_first(tset);
+ if (!thread_group_leader(p))
+ return 0;
+
from = mem_cgroup_from_task(p);
VM_BUG_ON(from == memcg);
--
2.4.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 59+ messages in thread
[parent not found: <1431978595-12176-4-git-send-email-tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>]
* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
2015-05-18 19:49 ` Tejun Heo
@ 2015-05-19 12:13 ` Michal Hocko
-1 siblings, 0 replies; 59+ messages in thread
From: Michal Hocko @ 2015-05-19 12:13 UTC (permalink / raw)
To: Tejun Heo
Cc: lizefan-hv44wF8Li93QT0dZR+AlfA, cgroups-u79uwXL29TY76Z2rM5mHXA,
hannes-druUgvl0LCNAfugRpC6u6w, linux-mm-Bw31MaZKKs3YtjvyW6yDsg
On Mon 18-05-15 15:49:51, Tejun Heo wrote:
> If move_charge flag is set, memcg tries to move memory charges to the
> destnation css. The current implementation migrates memory whenever
> any thread of a process is migrated making the behavior somewhat
> arbitrary.
This is not true. We have:
mm = get_task_mm(p);
if (!mm)
return 0;
/* We move charges only when we move a owner of the mm */
if (mm->owner == p) {
So we are ignoring threads which are not owner of the mm struct and that
should be the thread group leader AFAICS.
mm_update_next_owner is rather complex (maybe too much and it would
deserve some attention) so there might really be some corner cases but
the whole memcg code relies on mm->owner rather than thread group leader
so I would keep the same logic here.
> Let's tie memory operations to the threadgroup leader so
> that memory is migrated only when the leader is migrated.
This would lead to another strange behavior when the group leader is not
owner (if that is possible at all) and the memory wouldn't get migrated
at all.
I am trying to wrap my head around mm_update_next_owner and maybe we can
change it to use the thread group leader but this needs more thinking...
> While this is a behavior change, given the inherent fuziness, this
> change is not too likely to be noticed and allows us to clearly define
> who owns the memory (always the leader) and helps the planned atomic
> multi-process migration.
>
> Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
> Cc: Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>
> ---
> mm/memcontrol.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b1b834d..74fcea3 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5014,6 +5014,9 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
> return 0;
>
> p = cgroup_taskset_first(tset);
> + if (!thread_group_leader(p))
> + return 0;
> +
> from = mem_cgroup_from_task(p);
>
> VM_BUG_ON(from == memcg);
> --
> 2.4.0
>
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 59+ messages in thread* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
@ 2015-05-19 12:13 ` Michal Hocko
0 siblings, 0 replies; 59+ messages in thread
From: Michal Hocko @ 2015-05-19 12:13 UTC (permalink / raw)
To: Tejun Heo; +Cc: lizefan, cgroups, hannes, linux-mm
On Mon 18-05-15 15:49:51, Tejun Heo wrote:
> If move_charge flag is set, memcg tries to move memory charges to the
> destnation css. The current implementation migrates memory whenever
> any thread of a process is migrated making the behavior somewhat
> arbitrary.
This is not true. We have:
mm = get_task_mm(p);
if (!mm)
return 0;
/* We move charges only when we move a owner of the mm */
if (mm->owner == p) {
So we are ignoring threads which are not owner of the mm struct and that
should be the thread group leader AFAICS.
mm_update_next_owner is rather complex (maybe too much and it would
deserve some attention) so there might really be some corner cases but
the whole memcg code relies on mm->owner rather than thread group leader
so I would keep the same logic here.
> Let's tie memory operations to the threadgroup leader so
> that memory is migrated only when the leader is migrated.
This would lead to another strange behavior when the group leader is not
owner (if that is possible at all) and the memory wouldn't get migrated
at all.
I am trying to wrap my head around mm_update_next_owner and maybe we can
change it to use the thread group leader but this needs more thinking...
> While this is a behavior change, given the inherent fuziness, this
> change is not too likely to be noticed and allows us to clearly define
> who owns the memory (always the leader) and helps the planned atomic
> multi-process migration.
>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@suse.cz>
> ---
> mm/memcontrol.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b1b834d..74fcea3 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5014,6 +5014,9 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
> return 0;
>
> p = cgroup_taskset_first(tset);
> + if (!thread_group_leader(p))
> + return 0;
> +
> from = mem_cgroup_from_task(p);
>
> VM_BUG_ON(from == memcg);
> --
> 2.4.0
>
--
Michal Hocko
SUSE Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 59+ messages in thread[parent not found: <20150519121321.GB6203-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>]
* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
2015-05-19 12:13 ` Michal Hocko
@ 2015-05-19 13:10 ` Michal Hocko
-1 siblings, 0 replies; 59+ messages in thread
From: Michal Hocko @ 2015-05-19 13:10 UTC (permalink / raw)
To: Tejun Heo
Cc: lizefan-hv44wF8Li93QT0dZR+AlfA, cgroups-u79uwXL29TY76Z2rM5mHXA,
hannes-druUgvl0LCNAfugRpC6u6w, linux-mm-Bw31MaZKKs3YtjvyW6yDsg
On Tue 19-05-15 14:13:21, Michal Hocko wrote:
> On Mon 18-05-15 15:49:51, Tejun Heo wrote:
> > If move_charge flag is set, memcg tries to move memory charges to the
> > destnation css. The current implementation migrates memory whenever
> > any thread of a process is migrated making the behavior somewhat
> > arbitrary.
>
> This is not true. We have:
> mm = get_task_mm(p);
> if (!mm)
> return 0;
> /* We move charges only when we move a owner of the mm */
> if (mm->owner == p) {
>
> So we are ignoring threads which are not owner of the mm struct and that
> should be the thread group leader AFAICS.
>
> mm_update_next_owner is rather complex (maybe too much and it would
> deserve some attention) so there might really be some corner cases but
> the whole memcg code relies on mm->owner rather than thread group leader
> so I would keep the same logic here.
>
> > Let's tie memory operations to the threadgroup leader so
> > that memory is migrated only when the leader is migrated.
>
> This would lead to another strange behavior when the group leader is not
> owner (if that is possible at all) and the memory wouldn't get migrated
> at all.
>
> I am trying to wrap my head around mm_update_next_owner and maybe we can
> change it to use the thread group leader but this needs more thinking...
OK, I guess I see the reason now. CLONE_VM doesn't imply CLONE_THREAD
so we can have a separate task (with it's own group leader) which
handles its own signals while it still shares the address space. So we
cannot really use the group leader here.
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 59+ messages in thread* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
@ 2015-05-19 13:10 ` Michal Hocko
0 siblings, 0 replies; 59+ messages in thread
From: Michal Hocko @ 2015-05-19 13:10 UTC (permalink / raw)
To: Tejun Heo; +Cc: lizefan, cgroups, hannes, linux-mm
On Tue 19-05-15 14:13:21, Michal Hocko wrote:
> On Mon 18-05-15 15:49:51, Tejun Heo wrote:
> > If move_charge flag is set, memcg tries to move memory charges to the
> > destnation css. The current implementation migrates memory whenever
> > any thread of a process is migrated making the behavior somewhat
> > arbitrary.
>
> This is not true. We have:
> mm = get_task_mm(p);
> if (!mm)
> return 0;
> /* We move charges only when we move a owner of the mm */
> if (mm->owner == p) {
>
> So we are ignoring threads which are not owner of the mm struct and that
> should be the thread group leader AFAICS.
>
> mm_update_next_owner is rather complex (maybe too much and it would
> deserve some attention) so there might really be some corner cases but
> the whole memcg code relies on mm->owner rather than thread group leader
> so I would keep the same logic here.
>
> > Let's tie memory operations to the threadgroup leader so
> > that memory is migrated only when the leader is migrated.
>
> This would lead to another strange behavior when the group leader is not
> owner (if that is possible at all) and the memory wouldn't get migrated
> at all.
>
> I am trying to wrap my head around mm_update_next_owner and maybe we can
> change it to use the thread group leader but this needs more thinking...
OK, I guess I see the reason now. CLONE_VM doesn't imply CLONE_THREAD
so we can have a separate task (with it's own group leader) which
handles its own signals while it still shares the address space. So we
cannot really use the group leader here.
--
Michal Hocko
SUSE Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 59+ messages in thread
* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
2015-05-19 12:13 ` Michal Hocko
@ 2015-05-19 21:27 ` Tejun Heo
-1 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-19 21:27 UTC (permalink / raw)
To: Michal Hocko
Cc: lizefan-hv44wF8Li93QT0dZR+AlfA, cgroups-u79uwXL29TY76Z2rM5mHXA,
hannes-druUgvl0LCNAfugRpC6u6w, linux-mm-Bw31MaZKKs3YtjvyW6yDsg
Hello, Michal.
On Tue, May 19, 2015 at 02:13:21PM +0200, Michal Hocko wrote:
> This is not true. We have:
> mm = get_task_mm(p);
> if (!mm)
> return 0;
> /* We move charges only when we move a owner of the mm */
> if (mm->owner == p) {
Ah, missed that part.
> So we are ignoring threads which are not owner of the mm struct and that
> should be the thread group leader AFAICS.
>
> mm_update_next_owner is rather complex (maybe too much and it would
> deserve some attention) so there might really be some corner cases but
> the whole memcg code relies on mm->owner rather than thread group leader
> so I would keep the same logic here.
>
> > Let's tie memory operations to the threadgroup leader so
> > that memory is migrated only when the leader is migrated.
>
> This would lead to another strange behavior when the group leader is not
> owner (if that is possible at all) and the memory wouldn't get migrated
> at all.
Hmmm... is it guaranteed that if a threadgroup owns a mm, the mm's
owner would be the threadgroup leader? If not, the current code is
broken too as it always takes the first member which is the
threadgroup leader and if that's not the mm owner we may skip
immigration while migrating the whole process.
I suppose the right thing to do here is iterating the taskset and find
the mm owner?
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 59+ messages in thread* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
@ 2015-05-19 21:27 ` Tejun Heo
0 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-19 21:27 UTC (permalink / raw)
To: Michal Hocko; +Cc: lizefan, cgroups, hannes, linux-mm
Hello, Michal.
On Tue, May 19, 2015 at 02:13:21PM +0200, Michal Hocko wrote:
> This is not true. We have:
> mm = get_task_mm(p);
> if (!mm)
> return 0;
> /* We move charges only when we move a owner of the mm */
> if (mm->owner == p) {
Ah, missed that part.
> So we are ignoring threads which are not owner of the mm struct and that
> should be the thread group leader AFAICS.
>
> mm_update_next_owner is rather complex (maybe too much and it would
> deserve some attention) so there might really be some corner cases but
> the whole memcg code relies on mm->owner rather than thread group leader
> so I would keep the same logic here.
>
> > Let's tie memory operations to the threadgroup leader so
> > that memory is migrated only when the leader is migrated.
>
> This would lead to another strange behavior when the group leader is not
> owner (if that is possible at all) and the memory wouldn't get migrated
> at all.
Hmmm... is it guaranteed that if a threadgroup owns a mm, the mm's
owner would be the threadgroup leader? If not, the current code is
broken too as it always takes the first member which is the
threadgroup leader and if that's not the mm owner we may skip
immigration while migrating the whole process.
I suppose the right thing to do here is iterating the taskset and find
the mm owner?
Thanks.
--
tejun
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 59+ messages in thread* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
2015-05-19 21:27 ` Tejun Heo
(?)
@ 2015-05-20 13:10 ` Michal Hocko
2015-05-20 13:21 ` Michal Hocko
-1 siblings, 1 reply; 59+ messages in thread
From: Michal Hocko @ 2015-05-20 13:10 UTC (permalink / raw)
To: Tejun Heo; +Cc: lizefan, cgroups, hannes, linux-mm, Oleg Nesterov
On Tue 19-05-15 17:27:54, Tejun Heo wrote:
> Hello, Michal.
>
> On Tue, May 19, 2015 at 02:13:21PM +0200, Michal Hocko wrote:
> > This is not true. We have:
> > mm = get_task_mm(p);
> > if (!mm)
> > return 0;
> > /* We move charges only when we move a owner of the mm */
> > if (mm->owner == p) {
>
> Ah, missed that part.
>
> > So we are ignoring threads which are not owner of the mm struct and that
> > should be the thread group leader AFAICS.
> >
> > mm_update_next_owner is rather complex (maybe too much and it would
> > deserve some attention) so there might really be some corner cases but
> > the whole memcg code relies on mm->owner rather than thread group leader
> > so I would keep the same logic here.
> >
> > > Let's tie memory operations to the threadgroup leader so
> > > that memory is migrated only when the leader is migrated.
> >
> > This would lead to another strange behavior when the group leader is not
> > owner (if that is possible at all) and the memory wouldn't get migrated
> > at all.
>
> Hmmm... is it guaranteed that if a threadgroup owns a mm, the mm's
> owner would be the threadgroup leader?
That is a good question. As I've said I would expect it to be a thread
group leader but 4cd1a8fc3d3c ("memcg: fix possible panic when
CONFIG_MM_OWNER=y") confused me by claiming
"
Also, the owner member comment description is wrong. mm->owner does
not necessarily point to the thread group leader.
"
But now I am looking closer into mm_update_next_owner. for_each_process
should see only thread group leaders. p->{real_parent->}children
siblings search should return group leaders as well AFAICS.
But I am completely lost in the exit code paths. E.g. what happens
when the thread group leader exits and the other threads are still
alive? I would expect another thread would be chosen as a new leader and
siblings would be updated. But I cannot find that code. Maybe the
original leader just waits for all other threads to terminate and stay
in the linked lists.
The scary comment for has_group_leader_pid suggests that a thread might
have a the real pid without being the group leader. /me confused
Something for Oleg I guess.
> If not, the current code is
> broken too as it always takes the first member which is the
> threadgroup leader and if that's not the mm owner we may skip
> immigration while migrating the whole process.
>
> I suppose the right thing to do here is iterating the taskset and find
> the mm owner?
>
> Thanks.
>
> --
> tejun
--
Michal Hocko
SUSE Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 59+ messages in thread* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
2015-05-20 13:10 ` Michal Hocko
@ 2015-05-20 13:21 ` Michal Hocko
[not found] ` <20150520132158.GB28678-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>
0 siblings, 1 reply; 59+ messages in thread
From: Michal Hocko @ 2015-05-20 13:21 UTC (permalink / raw)
To: Tejun Heo; +Cc: lizefan, cgroups, hannes, linux-mm, Oleg Nesterov
On Wed 20-05-15 15:10:44, Michal Hocko wrote:
[...]
> But I am completely lost in the exit code paths. E.g. what happens
> when the thread group leader exits and the other threads are still
> alive? I would expect another thread would be chosen as a new leader and
> siblings would be updated. But I cannot find that code. Maybe the
> original leader just waits for all other threads to terminate and stay
> in the linked lists.
I've tried a simple test where the main thread (group leader) calls
pthread_exit after it creates another thread. The other thread continues
to run and the leader is marked as Zombie:
$ ./t &
Main pid:2432 tid:2432
Exiting main thread
Secondary pid:2432 tid:2433......
$ ps ax | grep 2432
2432 pts/3 Zl+ 0:00 [t] <defunct>
So I assume the leader simply waits for its threads to finish and it
stays in the sibling list. __unhash_process seems like it does the final
cleanup and unlinks the leader from the lists. Which means that
mm_update_next_owner never sees !group_leader. Is that correct Oleg?
--
Michal Hocko
SUSE Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 59+ messages in thread
* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
2015-05-18 19:49 ` Tejun Heo
@ 2015-05-21 14:12 ` Michal Hocko
-1 siblings, 0 replies; 59+ messages in thread
From: Michal Hocko @ 2015-05-21 14:12 UTC (permalink / raw)
To: Tejun Heo
Cc: lizefan-hv44wF8Li93QT0dZR+AlfA, cgroups-u79uwXL29TY76Z2rM5mHXA,
hannes-druUgvl0LCNAfugRpC6u6w, linux-mm-Bw31MaZKKs3YtjvyW6yDsg
On Mon 18-05-15 15:49:51, Tejun Heo wrote:
> If move_charge flag is set, memcg tries to move memory charges to the
> destnation css. The current implementation migrates memory whenever
> any thread of a process is migrated making the behavior somewhat
> arbitrary. Let's tie memory operations to the threadgroup leader so
> that memory is migrated only when the leader is migrated.
>
> While this is a behavior change, given the inherent fuziness, this
> change is not too likely to be noticed and allows us to clearly define
> who owns the memory (always the leader) and helps the planned atomic
> multi-process migration.
>
> Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
> Cc: Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>
OK, I guess the discussion with Oleg confirmed that the patch is not
really needed because mm_struct->owner check implies thread group
leader. This should be sufficient for your purpose Tejun, right?
> ---
> mm/memcontrol.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b1b834d..74fcea3 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5014,6 +5014,9 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
> return 0;
>
> p = cgroup_taskset_first(tset);
> + if (!thread_group_leader(p))
> + return 0;
> +
> from = mem_cgroup_from_task(p);
>
> VM_BUG_ON(from == memcg);
> --
> 2.4.0
>
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 59+ messages in thread
* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
@ 2015-05-21 14:12 ` Michal Hocko
0 siblings, 0 replies; 59+ messages in thread
From: Michal Hocko @ 2015-05-21 14:12 UTC (permalink / raw)
To: Tejun Heo; +Cc: lizefan, cgroups, hannes, linux-mm
On Mon 18-05-15 15:49:51, Tejun Heo wrote:
> If move_charge flag is set, memcg tries to move memory charges to the
> destnation css. The current implementation migrates memory whenever
> any thread of a process is migrated making the behavior somewhat
> arbitrary. Let's tie memory operations to the threadgroup leader so
> that memory is migrated only when the leader is migrated.
>
> While this is a behavior change, given the inherent fuziness, this
> change is not too likely to be noticed and allows us to clearly define
> who owns the memory (always the leader) and helps the planned atomic
> multi-process migration.
>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@suse.cz>
OK, I guess the discussion with Oleg confirmed that the patch is not
really needed because mm_struct->owner check implies thread group
leader. This should be sufficient for your purpose Tejun, right?
> ---
> mm/memcontrol.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b1b834d..74fcea3 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5014,6 +5014,9 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
> return 0;
>
> p = cgroup_taskset_first(tset);
> + if (!thread_group_leader(p))
> + return 0;
> +
> from = mem_cgroup_from_task(p);
>
> VM_BUG_ON(from == memcg);
> --
> 2.4.0
>
--
Michal Hocko
SUSE Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 59+ messages in thread
[parent not found: <20150521141225.GB14475-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>]
* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
2015-05-21 14:12 ` Michal Hocko
@ 2015-05-21 22:09 ` Tejun Heo
-1 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-21 22:09 UTC (permalink / raw)
To: Michal Hocko
Cc: lizefan-hv44wF8Li93QT0dZR+AlfA, cgroups-u79uwXL29TY76Z2rM5mHXA,
hannes-druUgvl0LCNAfugRpC6u6w, linux-mm-Bw31MaZKKs3YtjvyW6yDsg
On Thu, May 21, 2015 at 04:12:26PM +0200, Michal Hocko wrote:
> On Mon 18-05-15 15:49:51, Tejun Heo wrote:
> > If move_charge flag is set, memcg tries to move memory charges to the
> > destnation css. The current implementation migrates memory whenever
> > any thread of a process is migrated making the behavior somewhat
> > arbitrary. Let's tie memory operations to the threadgroup leader so
> > that memory is migrated only when the leader is migrated.
> >
> > While this is a behavior change, given the inherent fuziness, this
> > change is not too likely to be noticed and allows us to clearly define
> > who owns the memory (always the leader) and helps the planned atomic
> > multi-process migration.
> >
> > Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> > Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
> > Cc: Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>
>
> OK, I guess the discussion with Oleg confirmed that the patch is not
> really needed because mm_struct->owner check implies thread group
> leader. This should be sufficient for your purpose Tejun, right?
Hmmm... we still need to update so that it actually iterates leaders
to find the owner as first in taskset == leader assumption is going
away but yeah this patch in itself can go away. I'll update the next
patch accordingly.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 59+ messages in thread
* Re: [PATCH 3/7] memcg: immigrate charges only when a threadgroup leader is moved
@ 2015-05-21 22:09 ` Tejun Heo
0 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-21 22:09 UTC (permalink / raw)
To: Michal Hocko; +Cc: lizefan, cgroups, hannes, linux-mm
On Thu, May 21, 2015 at 04:12:26PM +0200, Michal Hocko wrote:
> On Mon 18-05-15 15:49:51, Tejun Heo wrote:
> > If move_charge flag is set, memcg tries to move memory charges to the
> > destnation css. The current implementation migrates memory whenever
> > any thread of a process is migrated making the behavior somewhat
> > arbitrary. Let's tie memory operations to the threadgroup leader so
> > that memory is migrated only when the leader is migrated.
> >
> > While this is a behavior change, given the inherent fuziness, this
> > change is not too likely to be noticed and allows us to clearly define
> > who owns the memory (always the leader) and helps the planned atomic
> > multi-process migration.
> >
> > Signed-off-by: Tejun Heo <tj@kernel.org>
> > Cc: Johannes Weiner <hannes@cmpxchg.org>
> > Cc: Michal Hocko <mhocko@suse.cz>
>
> OK, I guess the discussion with Oleg confirmed that the patch is not
> really needed because mm_struct->owner check implies thread group
> leader. This should be sufficient for your purpose Tejun, right?
Hmmm... we still need to update so that it actually iterates leaders
to find the owner as first in taskset == leader assumption is going
away but yeah this patch in itself can go away. I'll update the next
patch accordingly.
Thanks.
--
tejun
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 59+ messages in thread
* [PATCH 4/7] cgroup, memcg, cpuset: implement cgroup_taskset_for_each_leader()
2015-05-18 19:49 ` Tejun Heo
@ 2015-05-18 19:49 ` Tejun Heo
-1 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, hannes-druUgvl0LCNAfugRpC6u6w,
mhocko-AlSwsSmVLrQ, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Tejun Heo
It wasn't explicitly documented but, when a process is being migrated,
cpuset and memcg depend on cgroup_taskset_first() returning the
threadgroup leader; however, this approach is somewhat ghetto and
would no longer work for the planned multi-process migration.
This patch introduces explicit cgroup_taskset_for_each_leader() which
iterates over only the threadgroup leaders and replaces
cgroup_taskset_first() usages for accessing the leader with it.
This prepares both memcg and cpuset for multi-process migration. This
patch also updates the documentation for cgroup_taskset_for_each() to
clarify the iteration rules and removes comments mentioning task
ordering in tasksets.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
Cc: Michal Hocko <mhocko-AlSwsSmVLrQ@public.gmane.org>
Cc: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
include/linux/cgroup.h | 22 ++++++++++++++++++++++
kernel/cgroup.c | 11 -----------
kernel/cpuset.c | 9 ++++-----
mm/memcontrol.c | 16 +++++++++++++---
4 files changed, 39 insertions(+), 19 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 82319fb..788fab38 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -197,11 +197,33 @@ void css_task_iter_end(struct css_task_iter *it);
* cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor
* @tset: taskset to iterate
+ *
+ * @tset may contain multiple tasks and they may belong to multiple
+ * processes. When there are multiple tasks in @tset, if a task of a
+ * process is in @tset, all tasks of the process are in @tset. Also, all
+ * are guaranteed to share the same source and destination csses.
+ *
+ * Iteration is not in any specific order.
*/
#define cgroup_taskset_for_each(task, tset) \
for ((task) = cgroup_taskset_first((tset)); (task); \
(task) = cgroup_taskset_next((tset)))
+/**
+ * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
+ * @leader: the loop cursor
+ * @tset: takset to iterate
+ *
+ * Iterate threadgroup leaders of @tset. For single-task migrations, @tset
+ * may not contain any.
+ */
+#define cgroup_taskset_for_each_leader(leader, tset) \
+ for ((leader) = cgroup_taskset_first((tset)); (leader); \
+ (leader) = cgroup_taskset_next((tset))) \
+ if ((leader) != (leader)->group_leader) \
+ ; \
+ else
+
/*
* Inline functions.
*/
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index afe9a7e..da45ce9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2064,13 +2064,6 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
get_css_set(new_cset);
rcu_assign_pointer(tsk->cgroups, new_cset);
-
- /*
- * Use move_tail so that cgroup_taskset_first() still returns the
- * leader after migration. This works because cgroup_migrate()
- * ensures that the dst_cset of the leader is the first on the
- * tset's dst_csets list.
- */
list_move_tail(&tsk->cg_list, &new_cset->mg_tasks);
/*
@@ -2266,10 +2259,6 @@ static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader,
if (!cset->mg_src_cgrp)
goto next;
- /*
- * cgroup_taskset_first() must always return the leader.
- * Take care to avoid disturbing the ordering.
- */
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node, &tset.src_csets);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 43db5b7..54a2b99 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1485,7 +1485,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
/* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
- struct task_struct *leader = cgroup_taskset_first(tset);
+ struct task_struct *leader;
struct cpuset *cs = css_cs(css);
struct cpuset *oldcs = cpuset_attach_old_cs;
@@ -1511,12 +1511,11 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
}
/*
- * Change mm, possibly for multiple threads in a threadgroup. This
- * is expensive and may sleep and should be moved outside migration
- * path proper.
+ * Change mm for all threadgroup leaders. This is expensive and may
+ * sleep and should be moved outside migration path proper.
*/
cpuset_attach_nodemask_to = cs->effective_mems;
- if (thread_group_leader(leader)) {
+ cgroup_taskset_for_each_leader(leader, tset) {
struct mm_struct *mm = get_task_mm(leader);
if (mm) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 74fcea3..526ed58 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4999,7 +4999,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup *from;
- struct task_struct *p;
+ struct task_struct *leader, *p;
struct mm_struct *mm;
unsigned long move_flags;
int ret = 0;
@@ -5013,8 +5013,18 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
if (!move_flags)
return 0;
- p = cgroup_taskset_first(tset);
- if (!thread_group_leader(p))
+ /*
+ * Multi-process migrations only happen on the default hierarchy
+ * where charge immigration is not used. Perform charge
+ * immigration if @tset contains a leader and whine if there are
+ * multiple.
+ */
+ p = NULL;
+ cgroup_taskset_for_each_leader(leader, tset) {
+ WARN_ON_ONCE(p);
+ p = leader;
+ }
+ if (!p)
return 0;
from = mem_cgroup_from_task(p);
--
2.4.0
^ permalink raw reply related [flat|nested] 59+ messages in thread* [PATCH 4/7] cgroup, memcg, cpuset: implement cgroup_taskset_for_each_leader()
@ 2015-05-18 19:49 ` Tejun Heo
0 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, hannes, mhocko, linux-mm, Tejun Heo
It wasn't explicitly documented but, when a process is being migrated,
cpuset and memcg depend on cgroup_taskset_first() returning the
threadgroup leader; however, this approach is somewhat ghetto and
would no longer work for the planned multi-process migration.
This patch introduces explicit cgroup_taskset_for_each_leader() which
iterates over only the threadgroup leaders and replaces
cgroup_taskset_first() usages for accessing the leader with it.
This prepares both memcg and cpuset for multi-process migration. This
patch also updates the documentation for cgroup_taskset_for_each() to
clarify the iteration rules and removes comments mentioning task
ordering in tasksets.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Li Zefan <lizefan@huawei.com>
---
include/linux/cgroup.h | 22 ++++++++++++++++++++++
kernel/cgroup.c | 11 -----------
kernel/cpuset.c | 9 ++++-----
mm/memcontrol.c | 16 +++++++++++++---
4 files changed, 39 insertions(+), 19 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 82319fb..788fab38 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -197,11 +197,33 @@ void css_task_iter_end(struct css_task_iter *it);
* cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor
* @tset: taskset to iterate
+ *
+ * @tset may contain multiple tasks and they may belong to multiple
+ * processes. When there are multiple tasks in @tset, if a task of a
+ * process is in @tset, all tasks of the process are in @tset. Also, all
+ * are guaranteed to share the same source and destination csses.
+ *
+ * Iteration is not in any specific order.
*/
#define cgroup_taskset_for_each(task, tset) \
for ((task) = cgroup_taskset_first((tset)); (task); \
(task) = cgroup_taskset_next((tset)))
+/**
+ * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
+ * @leader: the loop cursor
+ * @tset: takset to iterate
+ *
+ * Iterate threadgroup leaders of @tset. For single-task migrations, @tset
+ * may not contain any.
+ */
+#define cgroup_taskset_for_each_leader(leader, tset) \
+ for ((leader) = cgroup_taskset_first((tset)); (leader); \
+ (leader) = cgroup_taskset_next((tset))) \
+ if ((leader) != (leader)->group_leader) \
+ ; \
+ else
+
/*
* Inline functions.
*/
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index afe9a7e..da45ce9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2064,13 +2064,6 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
get_css_set(new_cset);
rcu_assign_pointer(tsk->cgroups, new_cset);
-
- /*
- * Use move_tail so that cgroup_taskset_first() still returns the
- * leader after migration. This works because cgroup_migrate()
- * ensures that the dst_cset of the leader is the first on the
- * tset's dst_csets list.
- */
list_move_tail(&tsk->cg_list, &new_cset->mg_tasks);
/*
@@ -2266,10 +2259,6 @@ static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader,
if (!cset->mg_src_cgrp)
goto next;
- /*
- * cgroup_taskset_first() must always return the leader.
- * Take care to avoid disturbing the ordering.
- */
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node, &tset.src_csets);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 43db5b7..54a2b99 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1485,7 +1485,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
/* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
- struct task_struct *leader = cgroup_taskset_first(tset);
+ struct task_struct *leader;
struct cpuset *cs = css_cs(css);
struct cpuset *oldcs = cpuset_attach_old_cs;
@@ -1511,12 +1511,11 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
}
/*
- * Change mm, possibly for multiple threads in a threadgroup. This
- * is expensive and may sleep and should be moved outside migration
- * path proper.
+ * Change mm for all threadgroup leaders. This is expensive and may
+ * sleep and should be moved outside migration path proper.
*/
cpuset_attach_nodemask_to = cs->effective_mems;
- if (thread_group_leader(leader)) {
+ cgroup_taskset_for_each_leader(leader, tset) {
struct mm_struct *mm = get_task_mm(leader);
if (mm) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 74fcea3..526ed58 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4999,7 +4999,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup *from;
- struct task_struct *p;
+ struct task_struct *leader, *p;
struct mm_struct *mm;
unsigned long move_flags;
int ret = 0;
@@ -5013,8 +5013,18 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
if (!move_flags)
return 0;
- p = cgroup_taskset_first(tset);
- if (!thread_group_leader(p))
+ /*
+ * Multi-process migrations only happen on the default hierarchy
+ * where charge immigration is not used. Perform charge
+ * immigration if @tset contains a leader and whine if there are
+ * multiple.
+ */
+ p = NULL;
+ cgroup_taskset_for_each_leader(leader, tset) {
+ WARN_ON_ONCE(p);
+ p = leader;
+ }
+ if (!p)
return 0;
from = mem_cgroup_from_task(p);
--
2.4.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 59+ messages in thread
* [PATCH 5/7] reorder cgroup_migrate()'s parameters
2015-05-18 19:49 ` Tejun Heo
@ 2015-05-18 19:49 ` Tejun Heo
-1 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, hannes-druUgvl0LCNAfugRpC6u6w,
mhocko-AlSwsSmVLrQ, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Tejun Heo
cgroup_migrate() has the destination cgroup as the first parameter
while cgroup_task_migrate() has the destination cset as the last.
Another migration function is scheduled to be added which can make the
discrepancy further stand out. Let's reorder cgroup_migrate()'s
parameters so that the destination cgroup is the last.
This doesn't cause any functional difference.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
kernel/cgroup.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index da45ce9..b36707b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2209,9 +2209,9 @@ err:
/**
* cgroup_migrate - migrate a process or task to a cgroup
- * @cgrp: the destination cgroup
* @leader: the leader of the process or the task to migrate
* @threadgroup: whether @leader points to the whole process or a single task
+ * @cgrp: the destination cgroup
*
* Migrate a process or task denoted by @leader to @cgrp. If migrating a
* process, the caller must be holding cgroup_threadgroup_rwsem. The
@@ -2225,8 +2225,8 @@ err:
* decided for all targets by invoking group_migrate_prepare_dst() before
* actually starting migrating.
*/
-static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader,
- bool threadgroup)
+static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
+ struct cgroup *cgrp)
{
struct cgroup_taskset tset = {
.src_csets = LIST_HEAD_INIT(tset.src_csets),
@@ -2363,7 +2363,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
/* prepare dst csets and commit */
ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets);
if (!ret)
- ret = cgroup_migrate(dst_cgrp, leader, threadgroup);
+ ret = cgroup_migrate(leader, threadgroup, dst_cgrp);
cgroup_migrate_finish(&preloaded_csets);
return ret;
@@ -2640,7 +2640,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
goto out_finish;
last_task = task;
- ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
+ ret = cgroup_migrate(task, true, src_cset->dfl_cgrp);
put_task_struct(task);
@@ -3711,7 +3711,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
css_task_iter_end(&it);
if (task) {
- ret = cgroup_migrate(to, task, false);
+ ret = cgroup_migrate(task, false, to);
put_task_struct(task);
}
} while (task && !ret);
--
2.4.0
^ permalink raw reply related [flat|nested] 59+ messages in thread* [PATCH 5/7] reorder cgroup_migrate()'s parameters
@ 2015-05-18 19:49 ` Tejun Heo
0 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, hannes, mhocko, linux-mm, Tejun Heo
cgroup_migrate() has the destination cgroup as the first parameter
while cgroup_task_migrate() has the destination cset as the last.
Another migration function is scheduled to be added which can make the
discrepancy further stand out. Let's reorder cgroup_migrate()'s
parameters so that the destination cgroup is the last.
This doesn't cause any functional difference.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/cgroup.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index da45ce9..b36707b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2209,9 +2209,9 @@ err:
/**
* cgroup_migrate - migrate a process or task to a cgroup
- * @cgrp: the destination cgroup
* @leader: the leader of the process or the task to migrate
* @threadgroup: whether @leader points to the whole process or a single task
+ * @cgrp: the destination cgroup
*
* Migrate a process or task denoted by @leader to @cgrp. If migrating a
* process, the caller must be holding cgroup_threadgroup_rwsem. The
@@ -2225,8 +2225,8 @@ err:
* decided for all targets by invoking group_migrate_prepare_dst() before
* actually starting migrating.
*/
-static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader,
- bool threadgroup)
+static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
+ struct cgroup *cgrp)
{
struct cgroup_taskset tset = {
.src_csets = LIST_HEAD_INIT(tset.src_csets),
@@ -2363,7 +2363,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
/* prepare dst csets and commit */
ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets);
if (!ret)
- ret = cgroup_migrate(dst_cgrp, leader, threadgroup);
+ ret = cgroup_migrate(leader, threadgroup, dst_cgrp);
cgroup_migrate_finish(&preloaded_csets);
return ret;
@@ -2640,7 +2640,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
goto out_finish;
last_task = task;
- ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
+ ret = cgroup_migrate(task, true, src_cset->dfl_cgrp);
put_task_struct(task);
@@ -3711,7 +3711,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
css_task_iter_end(&it);
if (task) {
- ret = cgroup_migrate(to, task, false);
+ ret = cgroup_migrate(task, false, to);
put_task_struct(task);
}
} while (task && !ret);
--
2.4.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 59+ messages in thread
* [PATCH 6/7] cgroup: separate out taskset operations from cgroup_migrate()
2015-05-18 19:49 ` Tejun Heo
@ 2015-05-18 19:49 ` Tejun Heo
-1 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, hannes-druUgvl0LCNAfugRpC6u6w,
mhocko-AlSwsSmVLrQ, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Tejun Heo
Currently, cgroup_migreate() implements large part of the migration
logic inline including building the target taskset and actually
migrating them. This patch separates out the following taskset
operations.
CGROUP_TASKSET_INIT() : taskset initializer
cgroup_taskset_add() : add a task to a taskset
cgroup_taskset_migrate() : migrate a taskset to the destination cgroup
This will be used to implement atomic multi-process migration in
cgroup_update_dfl_csses(). This is pure reorganization which doesn't
introduce any functional changes.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
kernel/cgroup.c | 211 +++++++++++++++++++++++++++++++++-----------------------
1 file changed, 125 insertions(+), 86 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b36707b..1d86d17 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1991,6 +1991,49 @@ struct cgroup_taskset {
struct task_struct *cur_task;
};
+#define CGROUP_TASKSET_INIT(tset) (struct cgroup_taskset){ \
+ .src_csets = LIST_HEAD_INIT(tset.src_csets), \
+ .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \
+ .csets = &tset.src_csets, \
+}
+
+/**
+ * cgroup_taskset_add - try to add a migration target task to a taskset
+ * @task: target task
+ * @tset: target taskset
+ *
+ * Add @task, which is a migration target, to @tset. This function becomes
+ * noop if @task doesn't need to be migrated. @task's css_set should have
+ * been added as a migration source and @task->cg_list will be moved from
+ * the css_set's tasks list to mg_tasks one.
+ */
+static void cgroup_taskset_add(struct task_struct *task,
+ struct cgroup_taskset *tset)
+{
+ struct css_set *cset;
+
+ lockdep_assert_held(&css_set_rwsem);
+
+ /* @task either already exited or can't exit until the end */
+ if (task->flags & PF_EXITING)
+ return;
+
+ /* leave @task alone if post_fork() hasn't linked it yet */
+ if (list_empty(&task->cg_list))
+ return;
+
+ cset = task_css_set(task);
+ if (!cset->mg_src_cgrp)
+ return;
+
+ list_move_tail(&task->cg_list, &cset->mg_tasks);
+ if (list_empty(&cset->mg_node))
+ list_add_tail(&cset->mg_node, &tset->src_csets);
+ if (list_empty(&cset->mg_dst_cset->mg_node))
+ list_move_tail(&cset->mg_dst_cset->mg_node,
+ &tset->dst_csets);
+}
+
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
@@ -2075,6 +2118,84 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
}
/**
+ * cgroup_taskset_migrate - migrate a taskset to a cgroup
+ * @tset: taget taskset
+ * @dst_cgrp: destination cgroup
+ *
+ * Migrate tasks in @tset to @dst_cgrp. This function fails iff one of the
+ * ->can_attach callbacks fails and guarantees that either all or none of
+ * the tasks in @tset are migrated. @tset is consumed regardless of
+ * success.
+ */
+static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
+ struct cgroup *dst_cgrp)
+{
+ struct cgroup_subsys_state *css, *failed_css = NULL;
+ struct task_struct *task, *tmp_task;
+ struct css_set *cset, *tmp_cset;
+ int i, ret;
+
+ /* methods shouldn't be called if no task is actually migrating */
+ if (list_empty(&tset->src_csets))
+ return 0;
+
+ /* check that we can legitimately attach to the cgroup */
+ for_each_e_css(css, i, dst_cgrp) {
+ if (css->ss->can_attach) {
+ ret = css->ss->can_attach(css, tset);
+ if (ret) {
+ failed_css = css;
+ goto out_cancel_attach;
+ }
+ }
+ }
+
+ /*
+ * Now that we're guaranteed success, proceed to move all tasks to
+ * the new cgroup. There are no failure cases after here, so this
+ * is the commit point.
+ */
+ down_write(&css_set_rwsem);
+ list_for_each_entry(cset, &tset->src_csets, mg_node) {
+ list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list)
+ cgroup_task_migrate(cset->mg_src_cgrp, task,
+ cset->mg_dst_cset);
+ }
+ up_write(&css_set_rwsem);
+
+ /*
+ * Migration is committed, all target tasks are now on dst_csets.
+ * Nothing is sensitive to fork() after this point. Notify
+ * controllers that migration is complete.
+ */
+ tset->csets = &tset->dst_csets;
+
+ for_each_e_css(css, i, dst_cgrp)
+ if (css->ss->attach)
+ css->ss->attach(css, tset);
+
+ ret = 0;
+ goto out_release_tset;
+
+out_cancel_attach:
+ for_each_e_css(css, i, dst_cgrp) {
+ if (css == failed_css)
+ break;
+ if (css->ss->cancel_attach)
+ css->ss->cancel_attach(css, tset);
+ }
+out_release_tset:
+ down_write(&css_set_rwsem);
+ list_splice_init(&tset->dst_csets, &tset->src_csets);
+ list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
+ list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
+ list_del_init(&cset->mg_node);
+ }
+ up_write(&css_set_rwsem);
+ return ret;
+}
+
+/**
* cgroup_migrate_finish - cleanup after attach
* @preloaded_csets: list of preloaded css_sets
*
@@ -2228,15 +2349,8 @@ err:
static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
struct cgroup *cgrp)
{
- struct cgroup_taskset tset = {
- .src_csets = LIST_HEAD_INIT(tset.src_csets),
- .dst_csets = LIST_HEAD_INIT(tset.dst_csets),
- .csets = &tset.src_csets,
- };
- struct cgroup_subsys_state *css, *failed_css = NULL;
- struct css_set *cset, *tmp_cset;
- struct task_struct *task, *tmp_task;
- int i, ret;
+ struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
+ struct task_struct *task;
/*
* Prevent freeing of tasks while we take a snapshot. Tasks that are
@@ -2247,89 +2361,14 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
rcu_read_lock();
task = leader;
do {
- /* @task either already exited or can't exit until the end */
- if (task->flags & PF_EXITING)
- goto next;
-
- /* leave @task alone if post_fork() hasn't linked it yet */
- if (list_empty(&task->cg_list))
- goto next;
-
- cset = task_css_set(task);
- if (!cset->mg_src_cgrp)
- goto next;
-
- list_move_tail(&task->cg_list, &cset->mg_tasks);
- if (list_empty(&cset->mg_node))
- list_add_tail(&cset->mg_node, &tset.src_csets);
- if (list_empty(&cset->mg_dst_cset->mg_node))
- list_move_tail(&cset->mg_dst_cset->mg_node,
- &tset.dst_csets);
- next:
+ cgroup_taskset_add(task, &tset);
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_write(&css_set_rwsem);
- /* methods shouldn't be called if no task is actually migrating */
- if (list_empty(&tset.src_csets))
- return 0;
-
- /* check that we can legitimately attach to the cgroup */
- for_each_e_css(css, i, cgrp) {
- if (css->ss->can_attach) {
- ret = css->ss->can_attach(css, &tset);
- if (ret) {
- failed_css = css;
- goto out_cancel_attach;
- }
- }
- }
-
- /*
- * Now that we're guaranteed success, proceed to move all tasks to
- * the new cgroup. There are no failure cases after here, so this
- * is the commit point.
- */
- down_write(&css_set_rwsem);
- list_for_each_entry(cset, &tset.src_csets, mg_node) {
- list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list)
- cgroup_task_migrate(cset->mg_src_cgrp, task,
- cset->mg_dst_cset);
- }
- up_write(&css_set_rwsem);
-
- /*
- * Migration is committed, all target tasks are now on dst_csets.
- * Nothing is sensitive to fork() after this point. Notify
- * controllers that migration is complete.
- */
- tset.csets = &tset.dst_csets;
-
- for_each_e_css(css, i, cgrp)
- if (css->ss->attach)
- css->ss->attach(css, &tset);
-
- ret = 0;
- goto out_release_tset;
-
-out_cancel_attach:
- for_each_e_css(css, i, cgrp) {
- if (css == failed_css)
- break;
- if (css->ss->cancel_attach)
- css->ss->cancel_attach(css, &tset);
- }
-out_release_tset:
- down_write(&css_set_rwsem);
- list_splice_init(&tset.dst_csets, &tset.src_csets);
- list_for_each_entry_safe(cset, tmp_cset, &tset.src_csets, mg_node) {
- list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
- list_del_init(&cset->mg_node);
- }
- up_write(&css_set_rwsem);
- return ret;
+ return cgroup_taskset_migrate(&tset, cgrp);
}
/**
--
2.4.0
^ permalink raw reply related [flat|nested] 59+ messages in thread* [PATCH 6/7] cgroup: separate out taskset operations from cgroup_migrate()
@ 2015-05-18 19:49 ` Tejun Heo
0 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, hannes, mhocko, linux-mm, Tejun Heo
Currently, cgroup_migreate() implements large part of the migration
logic inline including building the target taskset and actually
migrating them. This patch separates out the following taskset
operations.
CGROUP_TASKSET_INIT() : taskset initializer
cgroup_taskset_add() : add a task to a taskset
cgroup_taskset_migrate() : migrate a taskset to the destination cgroup
This will be used to implement atomic multi-process migration in
cgroup_update_dfl_csses(). This is pure reorganization which doesn't
introduce any functional changes.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/cgroup.c | 211 +++++++++++++++++++++++++++++++++-----------------------
1 file changed, 125 insertions(+), 86 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b36707b..1d86d17 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1991,6 +1991,49 @@ struct cgroup_taskset {
struct task_struct *cur_task;
};
+#define CGROUP_TASKSET_INIT(tset) (struct cgroup_taskset){ \
+ .src_csets = LIST_HEAD_INIT(tset.src_csets), \
+ .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \
+ .csets = &tset.src_csets, \
+}
+
+/**
+ * cgroup_taskset_add - try to add a migration target task to a taskset
+ * @task: target task
+ * @tset: target taskset
+ *
+ * Add @task, which is a migration target, to @tset. This function becomes
+ * noop if @task doesn't need to be migrated. @task's css_set should have
+ * been added as a migration source and @task->cg_list will be moved from
+ * the css_set's tasks list to mg_tasks one.
+ */
+static void cgroup_taskset_add(struct task_struct *task,
+ struct cgroup_taskset *tset)
+{
+ struct css_set *cset;
+
+ lockdep_assert_held(&css_set_rwsem);
+
+ /* @task either already exited or can't exit until the end */
+ if (task->flags & PF_EXITING)
+ return;
+
+ /* leave @task alone if post_fork() hasn't linked it yet */
+ if (list_empty(&task->cg_list))
+ return;
+
+ cset = task_css_set(task);
+ if (!cset->mg_src_cgrp)
+ return;
+
+ list_move_tail(&task->cg_list, &cset->mg_tasks);
+ if (list_empty(&cset->mg_node))
+ list_add_tail(&cset->mg_node, &tset->src_csets);
+ if (list_empty(&cset->mg_dst_cset->mg_node))
+ list_move_tail(&cset->mg_dst_cset->mg_node,
+ &tset->dst_csets);
+}
+
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
@@ -2075,6 +2118,84 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
}
/**
+ * cgroup_taskset_migrate - migrate a taskset to a cgroup
+ * @tset: taget taskset
+ * @dst_cgrp: destination cgroup
+ *
+ * Migrate tasks in @tset to @dst_cgrp. This function fails iff one of the
+ * ->can_attach callbacks fails and guarantees that either all or none of
+ * the tasks in @tset are migrated. @tset is consumed regardless of
+ * success.
+ */
+static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
+ struct cgroup *dst_cgrp)
+{
+ struct cgroup_subsys_state *css, *failed_css = NULL;
+ struct task_struct *task, *tmp_task;
+ struct css_set *cset, *tmp_cset;
+ int i, ret;
+
+ /* methods shouldn't be called if no task is actually migrating */
+ if (list_empty(&tset->src_csets))
+ return 0;
+
+ /* check that we can legitimately attach to the cgroup */
+ for_each_e_css(css, i, dst_cgrp) {
+ if (css->ss->can_attach) {
+ ret = css->ss->can_attach(css, tset);
+ if (ret) {
+ failed_css = css;
+ goto out_cancel_attach;
+ }
+ }
+ }
+
+ /*
+ * Now that we're guaranteed success, proceed to move all tasks to
+ * the new cgroup. There are no failure cases after here, so this
+ * is the commit point.
+ */
+ down_write(&css_set_rwsem);
+ list_for_each_entry(cset, &tset->src_csets, mg_node) {
+ list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list)
+ cgroup_task_migrate(cset->mg_src_cgrp, task,
+ cset->mg_dst_cset);
+ }
+ up_write(&css_set_rwsem);
+
+ /*
+ * Migration is committed, all target tasks are now on dst_csets.
+ * Nothing is sensitive to fork() after this point. Notify
+ * controllers that migration is complete.
+ */
+ tset->csets = &tset->dst_csets;
+
+ for_each_e_css(css, i, dst_cgrp)
+ if (css->ss->attach)
+ css->ss->attach(css, tset);
+
+ ret = 0;
+ goto out_release_tset;
+
+out_cancel_attach:
+ for_each_e_css(css, i, dst_cgrp) {
+ if (css == failed_css)
+ break;
+ if (css->ss->cancel_attach)
+ css->ss->cancel_attach(css, tset);
+ }
+out_release_tset:
+ down_write(&css_set_rwsem);
+ list_splice_init(&tset->dst_csets, &tset->src_csets);
+ list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
+ list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
+ list_del_init(&cset->mg_node);
+ }
+ up_write(&css_set_rwsem);
+ return ret;
+}
+
+/**
* cgroup_migrate_finish - cleanup after attach
* @preloaded_csets: list of preloaded css_sets
*
@@ -2228,15 +2349,8 @@ err:
static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
struct cgroup *cgrp)
{
- struct cgroup_taskset tset = {
- .src_csets = LIST_HEAD_INIT(tset.src_csets),
- .dst_csets = LIST_HEAD_INIT(tset.dst_csets),
- .csets = &tset.src_csets,
- };
- struct cgroup_subsys_state *css, *failed_css = NULL;
- struct css_set *cset, *tmp_cset;
- struct task_struct *task, *tmp_task;
- int i, ret;
+ struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
+ struct task_struct *task;
/*
* Prevent freeing of tasks while we take a snapshot. Tasks that are
@@ -2247,89 +2361,14 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
rcu_read_lock();
task = leader;
do {
- /* @task either already exited or can't exit until the end */
- if (task->flags & PF_EXITING)
- goto next;
-
- /* leave @task alone if post_fork() hasn't linked it yet */
- if (list_empty(&task->cg_list))
- goto next;
-
- cset = task_css_set(task);
- if (!cset->mg_src_cgrp)
- goto next;
-
- list_move_tail(&task->cg_list, &cset->mg_tasks);
- if (list_empty(&cset->mg_node))
- list_add_tail(&cset->mg_node, &tset.src_csets);
- if (list_empty(&cset->mg_dst_cset->mg_node))
- list_move_tail(&cset->mg_dst_cset->mg_node,
- &tset.dst_csets);
- next:
+ cgroup_taskset_add(task, &tset);
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_write(&css_set_rwsem);
- /* methods shouldn't be called if no task is actually migrating */
- if (list_empty(&tset.src_csets))
- return 0;
-
- /* check that we can legitimately attach to the cgroup */
- for_each_e_css(css, i, cgrp) {
- if (css->ss->can_attach) {
- ret = css->ss->can_attach(css, &tset);
- if (ret) {
- failed_css = css;
- goto out_cancel_attach;
- }
- }
- }
-
- /*
- * Now that we're guaranteed success, proceed to move all tasks to
- * the new cgroup. There are no failure cases after here, so this
- * is the commit point.
- */
- down_write(&css_set_rwsem);
- list_for_each_entry(cset, &tset.src_csets, mg_node) {
- list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list)
- cgroup_task_migrate(cset->mg_src_cgrp, task,
- cset->mg_dst_cset);
- }
- up_write(&css_set_rwsem);
-
- /*
- * Migration is committed, all target tasks are now on dst_csets.
- * Nothing is sensitive to fork() after this point. Notify
- * controllers that migration is complete.
- */
- tset.csets = &tset.dst_csets;
-
- for_each_e_css(css, i, cgrp)
- if (css->ss->attach)
- css->ss->attach(css, &tset);
-
- ret = 0;
- goto out_release_tset;
-
-out_cancel_attach:
- for_each_e_css(css, i, cgrp) {
- if (css == failed_css)
- break;
- if (css->ss->cancel_attach)
- css->ss->cancel_attach(css, &tset);
- }
-out_release_tset:
- down_write(&css_set_rwsem);
- list_splice_init(&tset.dst_csets, &tset.src_csets);
- list_for_each_entry_safe(cset, tmp_cset, &tset.src_csets, mg_node) {
- list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
- list_del_init(&cset->mg_node);
- }
- up_write(&css_set_rwsem);
- return ret;
+ return cgroup_taskset_migrate(&tset, cgrp);
}
/**
--
2.4.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 59+ messages in thread
* [PATCH 7/7] cgroup: make cgroup_update_dfl_csses() migrate all target processes atomically
2015-05-18 19:49 ` Tejun Heo
@ 2015-05-18 19:49 ` Tejun Heo
-1 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan-hv44wF8Li93QT0dZR+AlfA
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, hannes-druUgvl0LCNAfugRpC6u6w,
mhocko-AlSwsSmVLrQ, linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Tejun Heo
cgroup_update_dfl_csses() is responsible for migrating processes when
controllers are enabled or disabled on the default hierarchy. As the
css association changes for all the processes in the affected cgroups,
this involves migrating multiple processes.
Up until now, it was implemented by migrating process-by-process until
the source css_sets are empty; however, this means that if a process
fails to migrate after some succeed before it, the recovery is very
tricky. This was considered okay as subsystems weren't allowed to
reject process migration on the default hierarchy; unfortunately,
enforcing this policy turned out to be problematic for certain types
of resources - realtime slices for now.
As such, the default hierarchy is gonna allow restricted failures
during migration and to support that this patch makes
cgroup_update_dfl_csses() migrate all target processes atomically
rather than one-by-one. The preceding patches made subsystems ready
for multi-process migration and factored out taskset operations making
this almost trivial. All tasks of the target processes are put in the
same taskset and the migration operations are performed once which
either fails or succeeds for all.
Signed-off-by: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
kernel/cgroup.c | 44 ++++++++------------------------------------
1 file changed, 8 insertions(+), 36 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1d86d17..978d741 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2616,6 +2616,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
static int cgroup_update_dfl_csses(struct cgroup *cgrp)
{
LIST_HEAD(preloaded_csets);
+ struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
struct cgroup_subsys_state *css;
struct css_set *src_cset;
int ret;
@@ -2644,50 +2645,21 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
if (ret)
goto out_finish;
+ down_write(&css_set_rwsem);
list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
- struct task_struct *last_task = NULL, *task;
+ struct task_struct *task, *ntask;
/* src_csets precede dst_csets, break on the first dst_cset */
if (!src_cset->mg_src_cgrp)
break;
- /*
- * All tasks in src_cset need to be migrated to the
- * matching dst_cset. Empty it process by process. We
- * walk tasks but migrate processes. The leader might even
- * belong to a different cset but such src_cset would also
- * be among the target src_csets because the default
- * hierarchy enforces per-process membership.
- */
- while (true) {
- down_read(&css_set_rwsem);
- task = list_first_entry_or_null(&src_cset->tasks,
- struct task_struct, cg_list);
- if (task) {
- task = task->group_leader;
- WARN_ON_ONCE(!task_css_set(task)->mg_src_cgrp);
- get_task_struct(task);
- }
- up_read(&css_set_rwsem);
-
- if (!task)
- break;
-
- /* guard against possible infinite loop */
- if (WARN(last_task == task,
- "cgroup: update_dfl_csses failed to make progress, aborting in inconsistent state\n"))
- goto out_finish;
- last_task = task;
-
- ret = cgroup_migrate(task, true, src_cset->dfl_cgrp);
-
- put_task_struct(task);
-
- if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
- goto out_finish;
- }
+ /* all tasks in src_csets need to be migrated */
+ list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
+ cgroup_taskset_add(task, &tset);
}
+ up_write(&css_set_rwsem);
+ ret = cgroup_taskset_migrate(&tset, cgrp);
out_finish:
cgroup_migrate_finish(&preloaded_csets);
percpu_up_write(&cgroup_threadgroup_rwsem);
--
2.4.0
^ permalink raw reply related [flat|nested] 59+ messages in thread* [PATCH 7/7] cgroup: make cgroup_update_dfl_csses() migrate all target processes atomically
@ 2015-05-18 19:49 ` Tejun Heo
0 siblings, 0 replies; 59+ messages in thread
From: Tejun Heo @ 2015-05-18 19:49 UTC (permalink / raw)
To: lizefan; +Cc: cgroups, hannes, mhocko, linux-mm, Tejun Heo
cgroup_update_dfl_csses() is responsible for migrating processes when
controllers are enabled or disabled on the default hierarchy. As the
css association changes for all the processes in the affected cgroups,
this involves migrating multiple processes.
Up until now, it was implemented by migrating process-by-process until
the source css_sets are empty; however, this means that if a process
fails to migrate after some succeed before it, the recovery is very
tricky. This was considered okay as subsystems weren't allowed to
reject process migration on the default hierarchy; unfortunately,
enforcing this policy turned out to be problematic for certain types
of resources - realtime slices for now.
As such, the default hierarchy is gonna allow restricted failures
during migration and to support that this patch makes
cgroup_update_dfl_csses() migrate all target processes atomically
rather than one-by-one. The preceding patches made subsystems ready
for multi-process migration and factored out taskset operations making
this almost trivial. All tasks of the target processes are put in the
same taskset and the migration operations are performed once which
either fails or succeeds for all.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/cgroup.c | 44 ++++++++------------------------------------
1 file changed, 8 insertions(+), 36 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1d86d17..978d741 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2616,6 +2616,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
static int cgroup_update_dfl_csses(struct cgroup *cgrp)
{
LIST_HEAD(preloaded_csets);
+ struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
struct cgroup_subsys_state *css;
struct css_set *src_cset;
int ret;
@@ -2644,50 +2645,21 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
if (ret)
goto out_finish;
+ down_write(&css_set_rwsem);
list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
- struct task_struct *last_task = NULL, *task;
+ struct task_struct *task, *ntask;
/* src_csets precede dst_csets, break on the first dst_cset */
if (!src_cset->mg_src_cgrp)
break;
- /*
- * All tasks in src_cset need to be migrated to the
- * matching dst_cset. Empty it process by process. We
- * walk tasks but migrate processes. The leader might even
- * belong to a different cset but such src_cset would also
- * be among the target src_csets because the default
- * hierarchy enforces per-process membership.
- */
- while (true) {
- down_read(&css_set_rwsem);
- task = list_first_entry_or_null(&src_cset->tasks,
- struct task_struct, cg_list);
- if (task) {
- task = task->group_leader;
- WARN_ON_ONCE(!task_css_set(task)->mg_src_cgrp);
- get_task_struct(task);
- }
- up_read(&css_set_rwsem);
-
- if (!task)
- break;
-
- /* guard against possible infinite loop */
- if (WARN(last_task == task,
- "cgroup: update_dfl_csses failed to make progress, aborting in inconsistent state\n"))
- goto out_finish;
- last_task = task;
-
- ret = cgroup_migrate(task, true, src_cset->dfl_cgrp);
-
- put_task_struct(task);
-
- if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
- goto out_finish;
- }
+ /* all tasks in src_csets need to be migrated */
+ list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
+ cgroup_taskset_add(task, &tset);
}
+ up_write(&css_set_rwsem);
+ ret = cgroup_taskset_migrate(&tset, cgrp);
out_finish:
cgroup_migrate_finish(&preloaded_csets);
percpu_up_write(&cgroup_threadgroup_rwsem);
--
2.4.0
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 59+ messages in thread