* [PATCH] cpu_hotplug: don't affect current task's affinity
@ 2009-07-14 8:47 Lai Jiangshan
2009-07-22 22:02 ` Andrew Morton
0 siblings, 1 reply; 3+ messages in thread
From: Lai Jiangshan @ 2009-07-14 8:47 UTC (permalink / raw)
To: Andrew Morton, Rusty Russell, LKML
_cpu_down() changes current task's affinity and then
recovers it at the end.
It brings two defects:
1) The recovering will failed in some condition.
# grep Cpus_allowed_list /proc/$$/status
Cpus_allowed_list: 0-3
# taskset -pc 2 $$
pid 29075's current affinity list: 0-3
pid 29075's new affinity list: 2
# grep Cpus_allowed_list /proc/$$/status
Cpus_allowed_list: 2
# echo 0 > /sys/devices/system/cpu/cpu2/online
# grep Cpus_allowed_list /proc/$$/status
Cpus_allowed_list: 0
In linux, tasks' "Cpus_allowed_list" which are "2" originally
will become "0-1,3" after the cpu#2 is offlined.
This "Cpus_allowed_list: 0" is suspicionful.
2) current task is a userspace task, the user may change
its cpu-affinity at the same time. The user may get unexpected
result if _cpu_down() changes current task's affinity.
Actually, we don't have to change the affinity.
We create a kernel thread to do the works.
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8ce1004..901caeb 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -162,15 +162,17 @@ static inline void check_for_tasks(int cpu)
write_unlock_irq(&tasklist_lock);
}
-struct take_cpu_down_param {
+struct cpu_down_param {
unsigned long mod;
- void *hcpu;
+ unsigned int cpu;
+ int ret;
+ struct completion done;
};
/* Take this CPU down. */
static int __ref take_cpu_down(void *_param)
{
- struct take_cpu_down_param *param = _param;
+ struct cpu_down_param *param = _param;
int err;
/* Ensure this CPU doesn't handle any more interrupts. */
@@ -179,7 +181,7 @@ static int __ref take_cpu_down(void *_param)
return err;
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
- param->hcpu);
+ (void *)(long)param->cpu);
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
@@ -187,26 +189,13 @@ static int __ref take_cpu_down(void *_param)
return 0;
}
-/* Requires cpu_add_remove_lock to be held */
-static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+static int __ref _cpu_down_thread(void *_param)
{
+ struct cpu_down_param *param = _param;
int err, nr_calls = 0;
- cpumask_var_t old_allowed;
+ unsigned long mod = param->mod;
+ unsigned int cpu = param->cpu;
void *hcpu = (void *)(long)cpu;
- unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
- struct take_cpu_down_param tcd_param = {
- .mod = mod,
- .hcpu = hcpu,
- };
-
- if (num_online_cpus() == 1)
- return -EBUSY;
-
- if (!cpu_online(cpu))
- return -EINVAL;
-
- if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
- return -ENOMEM;
cpu_hotplug_begin();
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
@@ -222,18 +211,16 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
}
/* Ensure that we are not runnable on dying cpu */
- cpumask_copy(old_allowed, ¤t->cpus_allowed);
- set_cpus_allowed_ptr(current,
- cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
+ set_cpus_allowed_ptr(current, cpu_active_mask);
- err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ err = __stop_machine(take_cpu_down, param, cpumask_of(cpu));
if (err) {
/* CPU didn't die: tell everyone. Can't complain. */
if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
hcpu) == NOTIFY_BAD)
BUG();
- goto out_allowed;
+ goto out_release;
}
BUG_ON(cpu_online(cpu));
@@ -251,8 +238,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
check_for_tasks(cpu);
-out_allowed:
- set_cpus_allowed_ptr(current, old_allowed);
out_release:
cpu_hotplug_done();
if (!err) {
@@ -260,8 +245,35 @@ out_release:
hcpu) == NOTIFY_BAD)
BUG();
}
- free_cpumask_var(old_allowed);
- return err;
+ param->ret = err;
+ complete(¶m->done);
+
+ return 0;
+}
+
+/* Requires cpu_add_remove_lock to be held */
+static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+{
+ struct task_struct *k;
+ struct cpu_down_param param = {
+ .mod = tasks_frozen ? CPU_TASKS_FROZEN : 0,
+ .cpu = cpu,
+ .ret = 0,
+ };
+
+ if (num_online_cpus() == 1)
+ return -EBUSY;
+
+ if (!cpu_online(cpu))
+ return -EINVAL;
+
+ init_completion(¶m.done);
+ k = kthread_run(_cpu_down_thread, ¶m, "kcpu_down");
+ if (IS_ERR(k))
+ return PTR_ERR(k);
+ wait_for_completion(¶m.done);
+
+ return param.ret;
}
int __ref cpu_down(unsigned int cpu)
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH] cpu_hotplug: don't affect current task's affinity
2009-07-14 8:47 [PATCH] cpu_hotplug: don't affect current task's affinity Lai Jiangshan
@ 2009-07-22 22:02 ` Andrew Morton
2009-07-23 2:06 ` Lai Jiangshan
0 siblings, 1 reply; 3+ messages in thread
From: Andrew Morton @ 2009-07-22 22:02 UTC (permalink / raw)
To: Lai Jiangshan; +Cc: rusty, linux-kernel, Ingo Molnar
On Tue, 14 Jul 2009 16:47:34 +0800
Lai Jiangshan <laijs@cn.fujitsu.com> wrote:
>
> _cpu_down() changes current task's affinity and then
> recovers it at the end.
>
> It brings two defects:
>
> 1) The recovering will failed in some condition.
>
> # grep Cpus_allowed_list /proc/$$/status
> Cpus_allowed_list: 0-3
>
> # taskset -pc 2 $$
> pid 29075's current affinity list: 0-3
> pid 29075's new affinity list: 2
>
> # grep Cpus_allowed_list /proc/$$/status
> Cpus_allowed_list: 2
>
> # echo 0 > /sys/devices/system/cpu/cpu2/online
>
> # grep Cpus_allowed_list /proc/$$/status
> Cpus_allowed_list: 0
>
> In linux, tasks' "Cpus_allowed_list" which are "2" originally
> will become "0-1,3" after the cpu#2 is offlined.
>
> This "Cpus_allowed_list: 0" is suspicionful.
>
> 2) current task is a userspace task, the user may change
> its cpu-affinity at the same time. The user may get unexpected
> result if _cpu_down() changes current task's affinity.
>
> Actually, we don't have to change the affinity.
> We create a kernel thread to do the works.
>
I've rewritten the description as below. Can you check it please?
: _cpu_down() changes the current task's affinity and then recovers it at the
: end.
:
: It has two problems:
:
: 1) The recovery of the current tasks's cpus_allowed will fail under
: some conditions.
:
: # grep Cpus_allowed_list /proc/$$/status
: Cpus_allowed_list: 0-3
:
: # taskset -pc 2 $$
: pid 29075's current affinity list: 0-3
: pid 29075's new affinity list: 2
:
: # grep Cpus_allowed_list /proc/$$/status
: Cpus_allowed_list: 2
:
: # echo 0 > /sys/devices/system/cpu/cpu2/online
:
: # grep Cpus_allowed_list /proc/$$/status
: Cpus_allowed_list: 0
:
: Here, the Cpus_allowed_list was originally "2" and has become
: "0-1,3" after cpu #2 is offlined.
:
: This "Cpus_allowed_list: 0" is incorrect.
:
: 2) If the current task is a userspace task, the user may change its
: cpu-affinity during the CPU hot-unplugging. This change can be
: overwritten when _cpu_down() changes the current task's affinity.
:
:
: Fix all this by not changing the current tasks's affinity. Instead we
: create a kernel thread to do the work.
I don't fully understand 1). You say that the tasks's cpus_allowed has
become "0-1,3". But it hasn't - it has become "0".
Are you saying that the mask is internally 0-1,3 and that the
/proc/$$/status file is incorrectly displaying it?
Or are you saying that the mask _should_ have been 0-1,3 but the kernel
incorrectly set it to "0"?
Also, it says "The recovery of the current tasks's cpus_allowed will
fail under some conditions". What are those conditions?
Thanks.
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] cpu_hotplug: don't affect current task's affinity
2009-07-22 22:02 ` Andrew Morton
@ 2009-07-23 2:06 ` Lai Jiangshan
0 siblings, 0 replies; 3+ messages in thread
From: Lai Jiangshan @ 2009-07-23 2:06 UTC (permalink / raw)
To: Andrew Morton; +Cc: rusty, linux-kernel, Ingo Molnar
Andrew Morton wrote:
>
> Or are you saying that the mask _should_ have been 0-1,3 but the kernel
> incorrectly set it to "0"?
>
Yes, the mask should become 0-1,3. when original is 2 after cpu2 is offlined.
When cpu2 is offined
original mask after cpu2 is offined
1 1
1-2 1
0-3 0-1,3
2 0-1,3
2(the task who do the offline) 0
I think the last line is incorrect. This patch fix it.
>
> Also, it says "The recovery of the current tasks's cpus_allowed will
> fail under some conditions". What are those conditions?
>
A condition is described in changelog(the same as above).
An example of other condition: the CPUSET of the task is changed
when this task do the offline... the last recovery may fails.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-07-23 2:06 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-14 8:47 [PATCH] cpu_hotplug: don't affect current task's affinity Lai Jiangshan
2009-07-22 22:02 ` Andrew Morton
2009-07-23 2:06 ` Lai Jiangshan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox