All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gautham R Shenoy <ego@in.ibm.com>
To: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Yi Yang <yi.y.yang@intel.com>, Ingo Molnar <mingo@elte.hu>,
	akpm@linux-foundation.org, linux-kernel@vger.kernel.org,
	"Rafael J. Wysocki" <rjw@sisk.pl>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: Re: [BUG 2.6.25-rc3] scheduler/hotplug: some processes are dealocked when cpu is set to offline
Date: Wed, 5 Mar 2008 15:35:09 +0530	[thread overview]
Message-ID: <20080305100509.GA9265@in.ibm.com> (raw)
In-Reply-To: <20080304150107.GA564@tv-sign.ru>

On Tue, Mar 04, 2008 at 06:01:07PM +0300, Oleg Nesterov wrote:
> On 03/04, Gautham R Shenoy wrote:
> >
> > So at times, the callback thread is blocked on kthread_stop(k) in
> > softlockup.c, while other time, it was blocked in
> > cleanup_workqueue_threads() in workqueue.c. 
> 
> >From another message:
> >
> > However, it remains in R< state
> 
> What about cwq->thread? Was it TASK_RUNNING too?

No, it was in TASK_UNINTERRUPTIBLE state. The last thing it ever
executed was the wait_for_completion in flush_cpu_workqueue()

> 
> Perhaps, for some reason the task can't get CPU after migrating from
> the now dead CPU.
> 
> I can't reproduce this problem on my one cpu P4-ht, perhaps you can
> try something like the untested/uncompiled patch below?
> 
> Oleg.
> 
> --- include/linux/sched.h	2008-02-15 16:59:17.000000000 +0300
> +++ include/linux/sched.h	2008-03-04 17:44:53.136738605 +0300
> @@ -1121,6 +1121,7 @@ struct task_struct {
>  /* hung task detection */
>  	unsigned long last_switch_timestamp;
>  	unsigned long last_switch_count;
> +	unsigned long xxx;
>  #endif
>  /* CPU-specific state of this task */
>  	struct thread_struct thread;
> --- kernel/fork.c	2008-02-15 16:59:17.000000000 +0300
> +++ kernel/fork.c	2008-03-04 17:45:14.773033839 +0300
> @@ -1097,6 +1097,7 @@ static struct task_struct *copy_process(
>  #ifdef CONFIG_DETECT_SOFTLOCKUP
>  	p->last_switch_count = 0;
>  	p->last_switch_timestamp = 0;
> +	p->xxx = 0;
>  #endif
> 
>  #ifdef CONFIG_TASK_XACCT
> --- kernel/sched.c	2008-02-15 16:59:17.000000000 +0300
> +++ kernel/sched.c	2008-03-04 17:48:42.308798646 +0300
> @@ -1291,6 +1291,7 @@ static void enqueue_task(struct rq *rq, 
>  	sched_info_queued(p);
>  	p->sched_class->enqueue_task(rq, p, wakeup);
>  	p->se.on_rq = 1;
> +	p->xxx = jiffies | 1;
>  }
> 
>  static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
> @@ -3944,6 +3945,8 @@ need_resched_nonpreemptible:
>  	preempt_enable_no_resched();
>  	if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
>  		goto need_resched;
> +
> +	current->xxx = 0;
>  }
>  EXPORT_SYMBOL(schedule);
> 
> --- kernel/softlockup.c	2008-02-15 16:59:17.000000000 +0300
> +++ kernel/softlockup.c	2008-03-04 17:49:05.584414763 +0300
> @@ -174,6 +174,27 @@ static void check_hung_task(struct task_
>  	touch_nmi_watchdog();
>  }
> 
> +static void check_running_task(struct task_struct *t, unsigned long now)
> +{
> +	if (!sysctl_hung_task_timeout_secs)
> +		return;
> +
> +	if (time_before(now, t->xxx + HZ * sysctl_hung_task_timeout_secs)
> +		return;
> +
> +	printk(KERN_ERR "INFO: task %s:%d can't get CPU for more than "
> +			"%ld seconds.\n", t->comm, t->pid,
> +			sysctl_hung_task_timeout_secs);
> +
> +	if (!cpus_intersects(t->cpus_allowed, cpu_online_map))
> +		printk(KERN_ERR "bad ->cpus_allowed\n");
> +	if (!cpu_online(task_cpu(t)))
> +		printk(KERN_ERR "bad ->cpu\n");
> +
> +	sched_show_task(t);
> +	touch_nmi_watchdog();
> +}
> +
>  /*
>   * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
>   * a really long time (120 seconds). If that happens, print out
> @@ -183,6 +204,7 @@ static void check_hung_uninterruptible_t
>  {
>  	int max_count = sysctl_hung_task_check_count;
>  	unsigned long now = get_timestamp(this_cpu);
> +	unsigned long jiff = jiffies;
>  	struct task_struct *g, *t;
> 
>  	/*
> @@ -192,15 +214,17 @@ static void check_hung_uninterruptible_t
>  	if ((tainted & TAINT_DIE) || did_panic)
>  		return;
> 
> -	read_lock(&tasklist_lock);
> +	rcu_read_lock();
>  	do_each_thread(g, t) {
>  		if (!--max_count)
>  			goto unlock;
>  		if (t->state & TASK_UNINTERRUPTIBLE)
>  			check_hung_task(t, now);
> +		if (!t->xxx)
> +			check_running_task(t, jiff);
>  	} while_each_thread(g, t);
>   unlock:
> -	read_unlock(&tasklist_lock);
> +	rcu_read_unlock();
>  }
> 
>  /*

-- 
Thanks and Regards
gautham

  parent reply	other threads:[~2008-03-05 10:05 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-02 18:42 [BUG 2.6.25-rc3] scheduler/hotplug: some processes are dealocked when cpu is set to offline Yi Yang
2008-03-03 11:54 ` Dmitry Adamushko
2008-03-03 11:56   ` Ingo Molnar
2008-03-03 12:02     ` Dmitry Adamushko
2008-03-03 14:53       ` Yi Yang
2008-03-03 17:37         ` Yi Yang
2008-03-03 15:31 ` Gautham R Shenoy
2008-03-03 14:45   ` Yi Yang
2008-03-04  5:26     ` Gautham R Shenoy
2008-03-04  9:09       ` Gautham R Shenoy
2008-03-03 21:56         ` Yi Yang
2008-03-04 15:01       ` Oleg Nesterov
2008-03-04 14:37         ` Yi Yang
2008-03-06 20:05           ` Yi Yang
2008-03-05 10:05         ` Gautham R Shenoy [this message]
2008-03-05 13:53           ` Oleg Nesterov
2008-03-06 11:15             ` Gautham R Shenoy
2008-03-06 12:22               ` Gautham R Shenoy
2008-03-06 13:44         ` Gautham R Shenoy
2008-03-07  2:54           ` Oleg Nesterov
2008-03-07  9:10             ` Gautham R Shenoy
2008-03-07 10:51               ` Gautham R Shenoy
2008-03-06 23:20                 ` Yi Yang
2008-03-07 13:02                 ` Dmitry Adamushko
2008-03-07 13:55                   ` Gautham R Shenoy
2008-03-07 15:50                     ` Gautham R Shenoy
2008-03-07 19:14                       ` [BUG 2.6.25-rc3] scheduler/hotplug: some processes aredealocked " Suresh Siddha
2008-03-07 20:18                   ` [BUG 2.6.25-rc3] scheduler/hotplug: some processes are dealocked " Andrew Morton
2008-03-07 21:36                     ` Rafael J. Wysocki
2008-03-07 23:01                       ` Suresh Siddha
2008-03-07 23:29                         ` Andrew Morton
2008-03-07 23:43                           ` Rafael J. Wysocki
2008-03-08  1:50                             ` Suresh Siddha
2008-03-08  2:09                               ` Andrew Morton
2008-03-08  5:10                               ` [PATCH] adjust root-domain->online span in response to hotplug event Gregory Haskins
2008-03-08  8:41                                 ` Ingo Molnar
2008-03-08 17:50                                   ` [PATCH] adjust root-domain->online span in response to hotplugevent Gregory Haskins
2008-03-09  0:31                                     ` Dmitry Adamushko
2008-03-10 14:12                                       ` Gregory Haskins
2008-03-09  2:35                                 ` [PATCH] adjust root-domain->online span in response to hotplug event Suresh Siddha
2008-03-10 12:41                                   ` Gregory Haskins
2008-03-10  8:14                                 ` Gautham R Shenoy
2008-03-10 13:13                                   ` [PATCH] cpu-hotplug: Register update_sched_domains() notifier with higher prio Gautham R Shenoy
2008-03-10 22:25                                     ` Andrew Morton
2008-03-10 13:39                                   ` [PATCH] keep rd->online and cpu_online_map in sync Gregory Haskins
2008-03-10 14:21                                     ` Gautham R Shenoy
2008-03-10 18:12                                     ` Suresh Siddha
2008-03-10 22:03                                       ` Rafael J. Wysocki
2008-03-10 22:00                                         ` Gregory Haskins
2008-03-10 22:10                                           ` Suresh Siddha
2008-03-10 21:59                                             ` [PATCH v2] " Gregory Haskins
2008-03-10 23:36                                               ` Andrew Morton
2008-03-11  1:34                                                 ` Suresh Siddha
2008-03-11  4:39                                                   ` Gautham R Shenoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080305100509.GA9265@in.ibm.com \
    --to=ego@in.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=oleg@tv-sign.ru \
    --cc=rjw@sisk.pl \
    --cc=tglx@linutronix.de \
    --cc=yi.y.yang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.