From: Oleg Nesterov <oleg@redhat.com>
To: Andrew Morton <akpm@linux-foundation.org>,
Ingo Molnar <mingo@elte.hu>, Lai Jiangshan <laijs@cn.fujitsu.com>,
Rusty Russell <rusty@rustcorp.com.au>
Cc: linux-kernel@vger.kernel.org, Li Zefan <lizf@cn.fujitsu.com>,
Miao Xie <miaox@cn.fujitsu.com>, Paul Menage <menage@google.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Gautham R Shenoy <ego@in.ibm.com>
Subject: [PATCH 1/1] cpu_hotplug: don't play with current->cpus_allowed
Date: Wed, 29 Jul 2009 23:43:10 +0200 [thread overview]
Message-ID: <20090729214310.GB24631@redhat.com> (raw)
In-Reply-To: <20090729212125.GA16970@redhat.com>
_cpu_down() changes the current task's affinity and then recovers it at
the end. The problems are well known: we can't restore old_allowed if it
was bound to the now-dead-cpu, and we can race with the userspace which
can change cpu-affinity during unplug.
_cpu_down() should not play with current->cpus_allowed at all. Instead,
take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
removes the dying cpu from cpu_online_mask.
Reported-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
include/linux/sched.h | 1 +
kernel/sched.c | 2 +-
kernel/cpu.c | 19 ++++++-------------
3 files changed, 8 insertions(+), 14 deletions(-)
--- CPUHP/include/linux/sched.h~CPU_DOWN_AFF 2009-07-23 17:06:39.000000000 +0200
+++ CPUHP/include/linux/sched.h 2009-07-29 23:27:44.000000000 +0200
@@ -1794,6 +1794,7 @@ extern void sched_clock_idle_sleep_event
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#ifdef CONFIG_HOTPLUG_CPU
+extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
extern void idle_task_exit(void);
#else
static inline void idle_task_exit(void) {}
--- CPUHP/kernel/sched.c~CPU_DOWN_AFF 2009-07-29 22:18:33.000000000 +0200
+++ CPUHP/kernel/sched.c 2009-07-29 23:27:44.000000000 +0200
@@ -7118,7 +7118,7 @@ static int __migrate_task_irq(struct tas
/*
* Figure out where task on dead CPU should go, use force if necessary.
*/
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
{
int dest_cpu;
const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
--- CPUHP/kernel/cpu.c~CPU_DOWN_AFF 2009-06-23 14:23:52.000000000 +0200
+++ CPUHP/kernel/cpu.c 2009-07-29 23:27:44.000000000 +0200
@@ -163,6 +163,7 @@ static inline void check_for_tasks(int c
}
struct take_cpu_down_param {
+ struct task_struct *caller;
unsigned long mod;
void *hcpu;
};
@@ -171,6 +172,7 @@ struct take_cpu_down_param {
static int __ref take_cpu_down(void *_param)
{
struct take_cpu_down_param *param = _param;
+ unsigned int cpu = (unsigned long)param->hcpu;
int err;
/* Ensure this CPU doesn't handle any more interrupts. */
@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_pa
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
param->hcpu);
+ if (task_cpu(param->caller) == cpu)
+ move_task_off_dead_cpu(cpu, param->caller);
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
sched_idle_next();
@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_pa
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{
int err, nr_calls = 0;
- cpumask_var_t old_allowed;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
+ .caller = current,
.mod = mod,
.hcpu = hcpu,
};
@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int
if (!cpu_online(cpu))
return -EINVAL;
- if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
- return -ENOMEM;
-
cpu_hotplug_begin();
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
hcpu, -1, &nr_calls);
@@ -221,11 +222,6 @@ static int __ref _cpu_down(unsigned int
goto out_release;
}
- /* Ensure that we are not runnable on dying cpu */
- cpumask_copy(old_allowed, ¤t->cpus_allowed);
- set_cpus_allowed_ptr(current,
- cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
-
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
/* CPU didn't die: tell everyone. Can't complain. */
@@ -233,7 +229,7 @@ static int __ref _cpu_down(unsigned int
hcpu) == NOTIFY_BAD)
BUG();
- goto out_allowed;
+ goto out_release;
}
BUG_ON(cpu_online(cpu));
@@ -251,8 +247,6 @@ static int __ref _cpu_down(unsigned int
check_for_tasks(cpu);
-out_allowed:
- set_cpus_allowed_ptr(current, old_allowed);
out_release:
cpu_hotplug_done();
if (!err) {
@@ -260,7 +254,6 @@ out_release:
hcpu) == NOTIFY_BAD)
BUG();
}
- free_cpumask_var(old_allowed);
return err;
}
next prev parent reply other threads:[~2009-07-29 21:47 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-07-29 2:33 + cpu_hotplug-dont-affect-current-tasks-affinity.patch added to -mm tree Oleg Nesterov
2009-07-29 21:21 ` Oleg Nesterov
2009-07-29 21:22 ` [PATCH] cpusets: fix deadlock with cpu_down()->cpuset_lock() Oleg Nesterov
2009-07-29 23:00 ` Oleg Nesterov
2009-07-30 1:53 ` Lai Jiangshan
2009-07-30 17:51 ` Oleg Nesterov
2009-07-31 2:23 ` Lai Jiangshan
2009-08-01 4:42 ` [PATCH] cpusets: rework guarantee_online_cpus() to fix deadlock with cpu_down() Oleg Nesterov
2009-08-01 5:34 ` Oleg Nesterov
2009-08-02 2:18 ` Lai Jiangshan
2009-08-02 6:55 ` Oleg Nesterov
2009-08-04 7:35 ` Lai Jiangshan
2009-08-04 16:36 ` Oleg Nesterov
2009-07-29 21:42 ` [PATCH 0/1] cpu_hotplug: don't play with current->cpus_allowed Oleg Nesterov
2009-07-29 21:43 ` Oleg Nesterov [this message]
2009-07-30 2:01 ` [PATCH 1/1] " Lai Jiangshan
2009-07-30 16:32 ` Oleg Nesterov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090729214310.GB24631@redhat.com \
--to=oleg@redhat.com \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=ego@in.ibm.com \
--cc=laijs@cn.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lizf@cn.fujitsu.com \
--cc=menage@google.com \
--cc=miaox@cn.fujitsu.com \
--cc=mingo@elte.hu \
--cc=rusty@rustcorp.com.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox