All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: "Liu, Chuansheng" <chuansheng.liu@intel.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Yanmin Zhang <yanmin_zhang@linux.intel.com>,
	"tglx@linutronix.de" <tglx@linutronix.de>,
	"Rafael J. Wysocki" <rjw@sisk.pl>,
	Linux PM mailing list <linux-pm@vger.kernel.org>
Subject: Re: [PATCH] Fix the race between smp_call_function and CPU booting
Date: Tue, 20 Mar 2012 18:44:34 +0530	[thread overview]
Message-ID: <4F6882BA.2040000@linux.vnet.ibm.com> (raw)
In-Reply-To: <1332245842.18960.413.camel@twins>


[ Adding a few more Cc's since this affects suspend/resume. ]

On 03/20/2012 05:47 PM, Peter Zijlstra wrote:

> On Tue, 2012-03-20 at 00:22 +0000, Liu, Chuansheng wrote:
>> Thanks to give some time to have a look.
>>
> Does the below on top of current tip/master work?


I don't think this patch would change anything, atleast it wouldn't get
rid of the warning that Liu reported. Because, he is running his stress
tests on a machine which has only 2 CPUs. So effectively, we are hotplugging
only CPU1 (since CPU0 can't be taken offline, on Intel boxes).

Also, CPU1 is removed from cpu_active_mask during CPU_DOWN_PREPARE time itself,
and migrate_tasks() comes much later (during CPU_DYING). And in any case,
dest_cpu will never be CPU1, because it is the CPU that is going down. So it
*has* to be CPU0 anyway.

So, I don't think changes to select_fallback_rq() to make it more careful is
going to make any difference in the particular scenario that Liu is testing.

That said, even I don't know what the root cause of the warning is.. :-(

Regards,
Srivatsa S. Bhat

>>
> ---
>  include/linux/cpuset.h |    6 +---
>  kernel/cpuset.c        |   20 +++------------
>  kernel/sched/core.c    |   62 +++++++++++++++++++++++++++++++++++------------
>  3 files changed, 52 insertions(+), 36 deletions(-)
> 
> diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
> index e9eaec5..e0ffaf0 100644
> --- a/include/linux/cpuset.h
> +++ b/include/linux/cpuset.h
> @@ -22,7 +22,7 @@ extern int cpuset_init(void);
>  extern void cpuset_init_smp(void);
>  extern void cpuset_update_active_cpus(void);
>  extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
> -extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
> +extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
>  extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
>  #define cpuset_current_mems_allowed (current->mems_allowed)
>  void cpuset_init_current_mems_allowed(void);
> @@ -144,10 +144,8 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
>  	cpumask_copy(mask, cpu_possible_mask);
>  }
> 
> -static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
> +static inline void cpuset_cpus_allowed_fallback(struct task_struct *p)
>  {
> -	do_set_cpus_allowed(p, cpu_possible_mask);
> -	return cpumask_any(cpu_active_mask);
>  }
> 
>  static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
> diff --git a/kernel/cpuset.c b/kernel/cpuset.c
> index a09ac2b..c9837b7 100644
> --- a/kernel/cpuset.c
> +++ b/kernel/cpuset.c
> @@ -2195,7 +2195,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
>  	mutex_unlock(&callback_mutex);
>  }
> 
> -int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
> +void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
>  {
>  	const struct cpuset *cs;
>  	int cpu;
> @@ -2219,22 +2219,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
>  	 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
>  	 * set any mask even if it is not right from task_cs() pov,
>  	 * the pending set_cpus_allowed_ptr() will fix things.
> +	 *
> +	 * select_fallback_rq() will fix things ups and set cpu_possible_mask
> +	 * if required.
>  	 */
> -
> -	cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
> -	if (cpu >= nr_cpu_ids) {
> -		/*
> -		 * Either tsk->cpus_allowed is wrong (see above) or it
> -		 * is actually empty. The latter case is only possible
> -		 * if we are racing with remove_tasks_in_empty_cpuset().
> -		 * Like above we can temporary set any mask and rely on
> -		 * set_cpus_allowed_ptr() as synchronization point.
> -		 */
> -		do_set_cpus_allowed(tsk, cpu_possible_mask);
> -		cpu = cpumask_any(cpu_active_mask);
> -	}
> -
> -	return cpu;
>  }
> 
>  void cpuset_init_current_mems_allowed(void)
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index e37c9af..04d3f7f 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1263,29 +1263,59 @@ EXPORT_SYMBOL_GPL(kick_process);
>   */
>  static int select_fallback_rq(int cpu, struct task_struct *p)
>  {
> -	int dest_cpu;
>  	const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
> +	enum { cpuset, possible, fail } state = cpuset;
> +	int dest_cpu;
> 
>  	/* Look for allowed, online CPU in same node. */
> -	for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
> +	for_each_cpu_mask(dest_cpu, *nodemask) {
> +		if (!cpu_online(dest_cpu))
> +			continue;
> +		if (!cpu_active(dest_cpu))
> +			continue;
>  		if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
>  			return dest_cpu;
> +	}
> 
> -	/* Any allowed, online CPU? */
> -	dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask);
> -	if (dest_cpu < nr_cpu_ids)
> -		return dest_cpu;
> +	for (;;) {
> +		/* Any allowed, online CPU? */
> +		for_each_cpu_mask(dest_cpu, *tsk_cpus_allowed(p)) {
> +			if (!cpu_online(dest_cpu))
> +				continue;
> +			if (!cpu_active(dest_cpu))
> +				continue;
> +			goto out;
> +		}
> 
> -	/* No more Mr. Nice Guy. */
> -	dest_cpu = cpuset_cpus_allowed_fallback(p);
> -	/*
> -	 * Don't tell them about moving exiting tasks or
> -	 * kernel threads (both mm NULL), since they never
> -	 * leave kernel.
> -	 */
> -	if (p->mm && printk_ratelimit()) {
> -		printk_sched("process %d (%s) no longer affine to cpu%d\n",
> -				task_pid_nr(p), p->comm, cpu);
> +		switch (state) {
> +		case cpuset:
> +			/* No more Mr. Nice Guy. */
> +			cpuset_cpus_allowed_fallback(p);
> +			state = possible;
> +			break;
> +
> +		case possible:
> +			do_set_cpus_allowed(p, cpu_possible_mask);
> +			state = fail;
> +			break;
> +
> +		case fail:
> +			BUG();
> +			break;
> +		}
> +	}
> +
> +out:
> +	if (state != cpuset) {
> +		/*
> +		 * Don't tell them about moving exiting tasks or
> +		 * kernel threads (both mm NULL), since they never
> +		 * leave kernel.
> +		 */
> +		if (p->mm && printk_ratelimit()) {
> +			printk_sched("process %d (%s) no longer affine to cpu%d\n",
> +					task_pid_nr(p), p->comm, cpu);
> +		}
>  	}
> 
>  	return dest_cpu;
> 



  reply	other threads:[~2012-03-20 13:33 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-12  9:27 [PATCH] Fix the race between smp_call_function and CPU booting Liu, Chuansheng
2012-03-12  9:32 ` Peter Zijlstra
2012-03-13  6:43   ` Liu, Chuansheng
2012-03-12  9:58 ` Peter Zijlstra
2012-03-13  6:46   ` Liu, Chuansheng
2012-03-13 15:57     ` Peter Zijlstra
2012-03-14  6:27       ` Liu, Chuansheng
2012-03-14  9:43         ` Peter Zijlstra
2012-03-15  0:11           ` Liu, Chuansheng
2012-03-15 10:46             ` Peter Zijlstra
2012-03-16  6:24               ` Liu, Chuansheng
2012-03-16  9:49                 ` Peter Zijlstra
2012-03-19  0:58                   ` Liu, Chuansheng
2012-03-19 10:03                     ` Peter Zijlstra
2012-03-20  0:22                       ` Liu, Chuansheng
2012-03-20 12:17                         ` Peter Zijlstra
2012-03-20 13:14                           ` Srivatsa S. Bhat [this message]
2012-03-20 13:41                             ` Peter Zijlstra
2012-03-20 15:02                               ` Srivatsa S. Bhat
2012-03-21  5:59                           ` Liu, Chuansheng
2012-03-21  9:12                             ` Peter Zijlstra
2012-03-21 12:25                             ` Peter Zijlstra
2012-03-22  0:59                               ` Liu, Chuansheng
2012-03-23 10:25                                 ` Peter Zijlstra
2012-03-23 11:32                                   ` Liu, Chuansheng
2012-03-23 12:02                                     ` Peter Zijlstra
2012-03-23 12:06                                       ` Liu, Chuansheng
2012-03-23 12:13                                         ` Peter Zijlstra
2012-03-23 12:21                                           ` Liu, Chuansheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F6882BA.2040000@linux.vnet.ibm.com \
    --to=srivatsa.bhat@linux.vnet.ibm.com \
    --cc=chuansheng.liu@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=rjw@sisk.pl \
    --cc=tglx@linutronix.de \
    --cc=yanmin_zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.