public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed
@ 2010-03-15  9:10 Oleg Nesterov
  2010-03-15 19:45 ` Rafael J. Wysocki
  2010-04-02 19:12 ` [tip:sched/core] sched: _cpu_down(): Don't " tip-bot for Oleg Nesterov
  0 siblings, 2 replies; 3+ messages in thread
From: Oleg Nesterov @ 2010-03-15  9:10 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar
  Cc: Ben Blum, Jiri Slaby, Lai Jiangshan, Li Zefan, Miao Xie,
	Paul Menage, Rafael J. Wysocki, Tejun Heo, linux-kernel

_cpu_down() changes the current task's affinity and then recovers it at
the end. The problems are well known: we can't restore old_allowed if it
was bound to the now-dead-cpu, and we can race with the userspace which
can change cpu-affinity during unplug.

_cpu_down() should not play with current->cpus_allowed at all. Instead,
take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
removes the dying cpu from cpu_online_mask.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---

 include/linux/sched.h |    1 +
 kernel/sched.c        |    2 +-
 kernel/cpu.c          |   18 ++++++------------
 3 files changed, 8 insertions(+), 13 deletions(-)

--- 34-rc1/include/linux/sched.h~4_CPU_DOWN_AFFINITY	2010-03-15 09:37:46.000000000 +0100
+++ 34-rc1/include/linux/sched.h	2010-03-15 09:41:51.000000000 +0100
@@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 
 #ifdef CONFIG_HOTPLUG_CPU
+extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
 extern void idle_task_exit(void);
 #else
 static inline void idle_task_exit(void) {}
--- 34-rc1/kernel/sched.c~4_CPU_DOWN_AFFINITY	2010-03-15 09:41:28.000000000 +0100
+++ 34-rc1/kernel/sched.c	2010-03-15 09:41:51.000000000 +0100
@@ -5503,7 +5503,7 @@ static int migration_thread(void *data)
 /*
  * Figure out where task on dead CPU should go, use force if necessary.
  */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	struct rq *rq = cpu_rq(dead_cpu);
 	int needs_cpu, dest_cpu;
--- 34-rc1/kernel/cpu.c~4_CPU_DOWN_AFFINITY	2010-03-15 09:37:46.000000000 +0100
+++ 34-rc1/kernel/cpu.c	2010-03-15 09:41:51.000000000 +0100
@@ -163,6 +163,7 @@ static inline void check_for_tasks(int c
 }
 
 struct take_cpu_down_param {
+	struct task_struct *caller;
 	unsigned long mod;
 	void *hcpu;
 };
@@ -171,6 +172,7 @@ struct take_cpu_down_param {
 static int __ref take_cpu_down(void *_param)
 {
 	struct take_cpu_down_param *param = _param;
+	unsigned int cpu = (unsigned long)param->hcpu;
 	int err;
 
 	/* Ensure this CPU doesn't handle any more interrupts. */
@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_pa
 	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
 				param->hcpu);
 
+	if (task_cpu(param->caller) == cpu)
+		move_task_off_dead_cpu(cpu, param->caller);
 	/* Force idle task to run as soon as we yield: it should
 	   immediately notice cpu is offline and die quickly. */
 	sched_idle_next();
@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_pa
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 {
 	int err, nr_calls = 0;
-	cpumask_var_t old_allowed;
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 	struct take_cpu_down_param tcd_param = {
+		.caller = current,
 		.mod = mod,
 		.hcpu = hcpu,
 	};
@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int 
 	if (!cpu_online(cpu))
 		return -EINVAL;
 
-	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
-		return -ENOMEM;
-
 	cpu_hotplug_begin();
 	set_cpu_active(cpu, false);
 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
@@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int 
 		goto out_release;
 	}
 
-	/* Ensure that we are not runnable on dying cpu */
-	cpumask_copy(old_allowed, &current->cpus_allowed);
-	set_cpus_allowed_ptr(current, cpu_active_mask);
-
 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 	if (err) {
 		set_cpu_active(cpu, true);
@@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int 
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 
-		goto out_allowed;
+		goto out_release;
 	}
 	BUG_ON(cpu_online(cpu));
 
@@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int 
 
 	check_for_tasks(cpu);
 
-out_allowed:
-	set_cpus_allowed_ptr(current, old_allowed);
 out_release:
 	cpu_hotplug_done();
 	if (!err) {
@@ -263,7 +258,6 @@ out_release:
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 	}
-	free_cpumask_var(old_allowed);
 	return err;
 }
 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed
  2010-03-15  9:10 [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed Oleg Nesterov
@ 2010-03-15 19:45 ` Rafael J. Wysocki
  2010-04-02 19:12 ` [tip:sched/core] sched: _cpu_down(): Don't " tip-bot for Oleg Nesterov
  1 sibling, 0 replies; 3+ messages in thread
From: Rafael J. Wysocki @ 2010-03-15 19:45 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Peter Zijlstra, Ingo Molnar, Ben Blum, Jiri Slaby, Lai Jiangshan,
	Li Zefan, Miao Xie, Paul Menage, Tejun Heo, linux-kernel

On Monday 15 March 2010, Oleg Nesterov wrote:
> _cpu_down() changes the current task's affinity and then recovers it at
> the end. The problems are well known: we can't restore old_allowed if it
> was bound to the now-dead-cpu, and we can race with the userspace which
> can change cpu-affinity during unplug.
> 
> _cpu_down() should not play with current->cpus_allowed at all. Instead,
> take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
> removes the dying cpu from cpu_online_mask.
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
> ---
> 
>  include/linux/sched.h |    1 +
>  kernel/sched.c        |    2 +-
>  kernel/cpu.c          |   18 ++++++------------
>  3 files changed, 8 insertions(+), 13 deletions(-)
> 
> --- 34-rc1/include/linux/sched.h~4_CPU_DOWN_AFFINITY	2010-03-15 09:37:46.000000000 +0100
> +++ 34-rc1/include/linux/sched.h	2010-03-15 09:41:51.000000000 +0100
> @@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event
>  extern void sched_clock_idle_wakeup_event(u64 delta_ns);
>  
>  #ifdef CONFIG_HOTPLUG_CPU
> +extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
>  extern void idle_task_exit(void);
>  #else
>  static inline void idle_task_exit(void) {}
> --- 34-rc1/kernel/sched.c~4_CPU_DOWN_AFFINITY	2010-03-15 09:41:28.000000000 +0100
> +++ 34-rc1/kernel/sched.c	2010-03-15 09:41:51.000000000 +0100
> @@ -5503,7 +5503,7 @@ static int migration_thread(void *data)
>  /*
>   * Figure out where task on dead CPU should go, use force if necessary.
>   */
> -static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
> +void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
>  {
>  	struct rq *rq = cpu_rq(dead_cpu);
>  	int needs_cpu, dest_cpu;
> --- 34-rc1/kernel/cpu.c~4_CPU_DOWN_AFFINITY	2010-03-15 09:37:46.000000000 +0100
> +++ 34-rc1/kernel/cpu.c	2010-03-15 09:41:51.000000000 +0100
> @@ -163,6 +163,7 @@ static inline void check_for_tasks(int c
>  }
>  
>  struct take_cpu_down_param {
> +	struct task_struct *caller;
>  	unsigned long mod;
>  	void *hcpu;
>  };
> @@ -171,6 +172,7 @@ struct take_cpu_down_param {
>  static int __ref take_cpu_down(void *_param)
>  {
>  	struct take_cpu_down_param *param = _param;
> +	unsigned int cpu = (unsigned long)param->hcpu;
>  	int err;
>  
>  	/* Ensure this CPU doesn't handle any more interrupts. */
> @@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_pa
>  	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
>  				param->hcpu);
>  
> +	if (task_cpu(param->caller) == cpu)
> +		move_task_off_dead_cpu(cpu, param->caller);
>  	/* Force idle task to run as soon as we yield: it should
>  	   immediately notice cpu is offline and die quickly. */
>  	sched_idle_next();
> @@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_pa
>  static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
>  {
>  	int err, nr_calls = 0;
> -	cpumask_var_t old_allowed;
>  	void *hcpu = (void *)(long)cpu;
>  	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
>  	struct take_cpu_down_param tcd_param = {
> +		.caller = current,
>  		.mod = mod,
>  		.hcpu = hcpu,
>  	};
> @@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int 
>  	if (!cpu_online(cpu))
>  		return -EINVAL;
>  
> -	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
> -		return -ENOMEM;
> -
>  	cpu_hotplug_begin();
>  	set_cpu_active(cpu, false);
>  	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
> @@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int 
>  		goto out_release;
>  	}
>  
> -	/* Ensure that we are not runnable on dying cpu */
> -	cpumask_copy(old_allowed, &current->cpus_allowed);
> -	set_cpus_allowed_ptr(current, cpu_active_mask);
> -
>  	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
>  	if (err) {
>  		set_cpu_active(cpu, true);
> @@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int 
>  					    hcpu) == NOTIFY_BAD)
>  			BUG();
>  
> -		goto out_allowed;
> +		goto out_release;
>  	}
>  	BUG_ON(cpu_online(cpu));
>  
> @@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int 
>  
>  	check_for_tasks(cpu);
>  
> -out_allowed:
> -	set_cpus_allowed_ptr(current, old_allowed);
>  out_release:
>  	cpu_hotplug_done();
>  	if (!err) {
> @@ -263,7 +258,6 @@ out_release:
>  					    hcpu) == NOTIFY_BAD)
>  			BUG();
>  	}
> -	free_cpumask_var(old_allowed);
>  	return err;
>  }

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [tip:sched/core] sched: _cpu_down(): Don't play with current->cpus_allowed
  2010-03-15  9:10 [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed Oleg Nesterov
  2010-03-15 19:45 ` Rafael J. Wysocki
@ 2010-04-02 19:12 ` tip-bot for Oleg Nesterov
  1 sibling, 0 replies; 3+ messages in thread
From: tip-bot for Oleg Nesterov @ 2010-04-02 19:12 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, a.p.zijlstra, oleg, tglx, rjw, mingo

Commit-ID:  6a1bdc1b577ebcb65f6603c57f8347309bc4ab13
Gitweb:     http://git.kernel.org/tip/6a1bdc1b577ebcb65f6603c57f8347309bc4ab13
Author:     Oleg Nesterov <oleg@redhat.com>
AuthorDate: Mon, 15 Mar 2010 10:10:23 +0100
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 2 Apr 2010 20:12:03 +0200

sched: _cpu_down(): Don't play with current->cpus_allowed

_cpu_down() changes the current task's affinity and then recovers it at
the end. The problems are well known: we can't restore old_allowed if it
was bound to the now-dead-cpu, and we can race with the userspace which
can change cpu-affinity during unplug.

_cpu_down() should not play with current->cpus_allowed at all. Instead,
take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
removes the dying cpu from cpu_online_mask.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100315091023.GA9148@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |    1 +
 kernel/cpu.c          |   18 ++++++------------
 kernel/sched.c        |    2 +-
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43c9451..8bea407 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 
 #ifdef CONFIG_HOTPLUG_CPU
+extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
 extern void idle_task_exit(void);
 #else
 static inline void idle_task_exit(void) {}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f8cced2..8d340fa 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -163,6 +163,7 @@ static inline void check_for_tasks(int cpu)
 }
 
 struct take_cpu_down_param {
+	struct task_struct *caller;
 	unsigned long mod;
 	void *hcpu;
 };
@@ -171,6 +172,7 @@ struct take_cpu_down_param {
 static int __ref take_cpu_down(void *_param)
 {
 	struct take_cpu_down_param *param = _param;
+	unsigned int cpu = (unsigned long)param->hcpu;
 	int err;
 
 	/* Ensure this CPU doesn't handle any more interrupts. */
@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_param)
 	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
 				param->hcpu);
 
+	if (task_cpu(param->caller) == cpu)
+		move_task_off_dead_cpu(cpu, param->caller);
 	/* Force idle task to run as soon as we yield: it should
 	   immediately notice cpu is offline and die quickly. */
 	sched_idle_next();
@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_param)
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 {
 	int err, nr_calls = 0;
-	cpumask_var_t old_allowed;
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 	struct take_cpu_down_param tcd_param = {
+		.caller = current,
 		.mod = mod,
 		.hcpu = hcpu,
 	};
@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	if (!cpu_online(cpu))
 		return -EINVAL;
 
-	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
-		return -ENOMEM;
-
 	cpu_hotplug_begin();
 	set_cpu_active(cpu, false);
 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
@@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 		goto out_release;
 	}
 
-	/* Ensure that we are not runnable on dying cpu */
-	cpumask_copy(old_allowed, &current->cpus_allowed);
-	set_cpus_allowed_ptr(current, cpu_active_mask);
-
 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 	if (err) {
 		set_cpu_active(cpu, true);
@@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 
-		goto out_allowed;
+		goto out_release;
 	}
 	BUG_ON(cpu_online(cpu));
 
@@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
 	check_for_tasks(cpu);
 
-out_allowed:
-	set_cpus_allowed_ptr(current, old_allowed);
 out_release:
 	cpu_hotplug_done();
 	if (!err) {
@@ -263,7 +258,6 @@ out_release:
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 	}
-	free_cpumask_var(old_allowed);
 	return err;
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 165b532..11119de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5442,7 +5442,7 @@ static int migration_thread(void *data)
 /*
  * Figure out where task on dead CPU should go, use force if necessary.
  */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	struct rq *rq = cpu_rq(dead_cpu);
 	int needs_cpu, uninitialized_var(dest_cpu);

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-04-02 19:13 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-15  9:10 [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed Oleg Nesterov
2010-03-15 19:45 ` Rafael J. Wysocki
2010-04-02 19:12 ` [tip:sched/core] sched: _cpu_down(): Don't " tip-bot for Oleg Nesterov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox