From: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: CAI Qian <caiqian-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: kexec <kexec-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org>,
linux-next-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
torvalds-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org,
linux-kernel
<linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: Re: kdump regression compared to v2.6.35
Date: Mon, 30 Aug 2010 14:50:32 +0200 [thread overview]
Message-ID: <4C7BA918.60707@kernel.org> (raw)
In-Reply-To: <1633441528.1498131283163868227.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
On 08/30/2010 12:24 PM, CAI Qian wrote:
> Can't see any difference with hangcheck timer enabled.
Hmm, odd. So, here's the said debug patch. It will periodically
check all works and report if any work is being delayed for too long.
If the max wait goes over 30secs, it will dump all task states and
disable itself. Can you please apply the patch on top of rc2 +
wq#for-linus and report the output? It should tell us who's stuck
where.
Thanks.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f11100f..282322c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -83,6 +83,8 @@ struct work_struct {
#ifdef CONFIG_LOCKDEP
struct lockdep_map lockdep_map;
#endif
+ unsigned long queued_on;
+ unsigned long activated_on;
};
#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a2dccfc..9f95169 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -913,6 +913,8 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
{
struct global_cwq *gcwq = cwq->gcwq;
+ work->queued_on = work->activated_on = jiffies;
+
/* we own @work, set data and link */
set_work_cwq(work, cwq, extra_flags);
@@ -996,13 +998,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
if (likely(cwq->nr_active < cwq->max_active)) {
cwq->nr_active++;
worklist = gcwq_determine_ins_pos(gcwq, cwq);
+ insert_work(cwq, work, worklist, work_flags);
} else {
work_flags |= WORK_STRUCT_DELAYED;
worklist = &cwq->delayed_works;
+ insert_work(cwq, work, worklist, work_flags);
+ work->activated_on--;
}
- insert_work(cwq, work, worklist, work_flags);
-
spin_unlock_irqrestore(&gcwq->lock, flags);
}
@@ -1669,6 +1672,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
struct work_struct, entry);
struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
+ work->activated_on = jiffies;
move_linked_works(work, pos, NULL);
__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
cwq->nr_active++;
@@ -2810,7 +2814,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
* list. Grab it, set max_active accordingly and add the new
* workqueue to workqueues list.
*/
- spin_lock(&workqueue_lock);
+ spin_lock_irq(&workqueue_lock);
if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
for_each_cwq_cpu(cpu, wq)
@@ -2818,7 +2822,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
list_add(&wq->list, &workqueues);
- spin_unlock(&workqueue_lock);
+ spin_unlock_irq(&workqueue_lock);
return wq;
err:
@@ -2849,9 +2853,9 @@ void destroy_workqueue(struct workqueue_struct *wq)
* wq list is used to freeze wq, remove from list after
* flushing is complete in case freeze races us.
*/
- spin_lock(&workqueue_lock);
+ spin_lock_irq(&workqueue_lock);
list_del(&wq->list);
- spin_unlock(&workqueue_lock);
+ spin_unlock_irq(&workqueue_lock);
/* sanity check */
for_each_cwq_cpu(cpu, wq) {
@@ -2891,23 +2895,23 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
- spin_lock(&workqueue_lock);
+ spin_lock_irq(&workqueue_lock);
wq->saved_max_active = max_active;
for_each_cwq_cpu(cpu, wq) {
struct global_cwq *gcwq = get_gcwq(cpu);
- spin_lock_irq(&gcwq->lock);
+ spin_lock(&gcwq->lock);
if (!(wq->flags & WQ_FREEZEABLE) ||
!(gcwq->flags & GCWQ_FREEZING))
get_cwq(gcwq->cpu, wq)->max_active = max_active;
- spin_unlock_irq(&gcwq->lock);
+ spin_unlock(&gcwq->lock);
}
- spin_unlock(&workqueue_lock);
+ spin_unlock_irq(&workqueue_lock);
}
EXPORT_SYMBOL_GPL(workqueue_set_max_active);
@@ -3419,7 +3423,7 @@ void freeze_workqueues_begin(void)
{
unsigned int cpu;
- spin_lock(&workqueue_lock);
+ spin_lock_irq(&workqueue_lock);
BUG_ON(workqueue_freezing);
workqueue_freezing = true;
@@ -3428,7 +3432,7 @@ void freeze_workqueues_begin(void)
struct global_cwq *gcwq = get_gcwq(cpu);
struct workqueue_struct *wq;
- spin_lock_irq(&gcwq->lock);
+ spin_lock(&gcwq->lock);
BUG_ON(gcwq->flags & GCWQ_FREEZING);
gcwq->flags |= GCWQ_FREEZING;
@@ -3440,10 +3444,10 @@ void freeze_workqueues_begin(void)
cwq->max_active = 0;
}
- spin_unlock_irq(&gcwq->lock);
+ spin_unlock(&gcwq->lock);
}
- spin_unlock(&workqueue_lock);
+ spin_unlock_irq(&workqueue_lock);
}
/**
@@ -3464,7 +3468,7 @@ bool freeze_workqueues_busy(void)
unsigned int cpu;
bool busy = false;
- spin_lock(&workqueue_lock);
+ spin_lock_irq(&workqueue_lock);
BUG_ON(!workqueue_freezing);
@@ -3488,7 +3492,7 @@ bool freeze_workqueues_busy(void)
}
}
out_unlock:
- spin_unlock(&workqueue_lock);
+ spin_unlock_irq(&workqueue_lock);
return busy;
}
@@ -3505,7 +3509,7 @@ void thaw_workqueues(void)
{
unsigned int cpu;
- spin_lock(&workqueue_lock);
+ spin_lock_irq(&workqueue_lock);
if (!workqueue_freezing)
goto out_unlock;
@@ -3514,7 +3518,7 @@ void thaw_workqueues(void)
struct global_cwq *gcwq = get_gcwq(cpu);
struct workqueue_struct *wq;
- spin_lock_irq(&gcwq->lock);
+ spin_lock(&gcwq->lock);
BUG_ON(!(gcwq->flags & GCWQ_FREEZING));
gcwq->flags &= ~GCWQ_FREEZING;
@@ -3535,15 +3539,82 @@ void thaw_workqueues(void)
wake_up_worker(gcwq);
- spin_unlock_irq(&gcwq->lock);
+ spin_unlock(&gcwq->lock);
}
workqueue_freezing = false;
out_unlock:
- spin_unlock(&workqueue_lock);
+ spin_unlock_irq(&workqueue_lock);
}
#endif /* CONFIG_FREEZER */
+#define WQ_CHECK_INTERVAL (10 * HZ)
+static void workqueue_check_timer_fn(unsigned long data);
+static DEFINE_TIMER(workqueue_check_timer, workqueue_check_timer_fn, 0, 0);
+
+static void workqueue_check_timer_fn(unsigned long data)
+{
+ unsigned long now = jiffies;
+ unsigned long wait, max_wait = 0;
+ unsigned int cpu;
+ unsigned long flags;
+
+ spin_lock_irqsave(&workqueue_lock, flags);
+
+ for_each_gcwq_cpu(cpu) {
+ struct global_cwq *gcwq = get_gcwq(cpu);
+ struct workqueue_struct *wq;
+ struct work_struct *work;
+
+ spin_lock(&gcwq->lock);
+
+ list_for_each_entry(wq, &workqueues, list) {
+ struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+ if (!cwq)
+ continue;
+
+ list_for_each_entry(work, &cwq->delayed_works, entry) {
+ WARN_ON_ONCE(!time_before(work->activated_on,
+ work->queued_on));
+ wait = now - work->queued_on;
+ if (wait < WQ_CHECK_INTERVAL)
+ continue;
+ max_wait = max(max_wait, wait);
+ printk("XXX %s/%d %p:%pf delayed for %ums\n",
+ wq->name,
+ gcwq->cpu != WORK_CPU_UNBOUND ? gcwq->cpu : -1,
+ work, work->func, jiffies_to_msecs(wait));
+ }
+ }
+
+ list_for_each_entry(work, &gcwq->worklist, entry) {
+ WARN_ON_ONCE(time_before(work->activated_on,
+ work->queued_on));
+ wait = now - work->activated_on;
+ if (wait < WQ_CHECK_INTERVAL)
+ continue;
+ max_wait = max(max_wait, wait);
+ printk("XXX %s/%d %p:%pf pending for %ums after delayed %ums\n",
+ get_work_cwq(work)->wq->name,
+ gcwq->cpu != WORK_CPU_UNBOUND ? gcwq->cpu : -1,
+ work, work->func,
+ jiffies_to_msecs(wait),
+ jiffies_to_msecs(work->activated_on - work->queued_on));
+ }
+
+ spin_unlock(&gcwq->lock);
+ }
+
+ spin_unlock_irqrestore(&workqueue_lock, flags);
+
+ if (max_wait > 20 * HZ) {
+ printk("XXX max_wait over 30secs, dumping tasks\n");
+ show_state();
+ } else
+ mod_timer(&workqueue_check_timer, now + WQ_CHECK_INTERVAL / 2);
+}
+
static int __init init_workqueues(void)
{
unsigned int cpu;
@@ -3596,6 +3667,7 @@ static int __init init_workqueues(void)
system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
WQ_UNBOUND_MAX_ACTIVE);
BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq);
+ mod_timer(&workqueue_check_timer, jiffies + WQ_CHECK_INTERVAL / 2);
return 0;
}
early_initcall(init_workqueues);
next prev parent reply other threads:[~2010-08-30 12:50 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <123671462.1479561283081998014.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
[not found] ` <123671462.1479561283081998014.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 11:41 ` kdump regression compared to v2.6.35 caiqian-H+wXaHxf7aLQT0dZR+AlfA
[not found] ` <171172387.1479581283082093912.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 11:56 ` CAI Qian
[not found] ` <1236896997.1479691283083005518.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 11:52 ` Tejun Heo
2010-08-29 12:03 ` CAI Qian
[not found] ` <779893521.1479771283083393771.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 12:36 ` Tejun Heo
2010-08-30 3:42 ` CAI Qian
[not found] ` <1888320510.1487031283139773505.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 8:29 ` Tejun Heo
2010-08-30 10:24 ` CAI Qian
[not found] ` <1633441528.1498131283163868227.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 12:50 ` Tejun Heo [this message]
2010-08-30 14:02 ` CAI Qian
[not found] ` <1141332926.1524871283176937097.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 14:21 ` Tejun Heo
2010-08-30 14:47 ` CAI Qian
[not found] ` <331762715.1536681283179646594.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 14:51 ` CAI Qian
[not found] ` <1706089082.1537331283179884183.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 14:55 ` Tejun Heo
[not found] <71887879.1606161283215975799.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
2010-08-31 0:53 ` caiqian
[not found] ` <2044609874.1606211283216015254.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-31 9:22 ` Tejun Heo
[not found] <373987879.1541191283181021800.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
2010-08-30 15:10 ` caiqian
[not found] ` <1331313838.1541221283181038073.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-30 16:38 ` Tejun Heo
[not found] ` <4C7BDE6E.8030107-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2010-08-30 17:31 ` Tejun Heo
[not found] <2142316909.1477341283065016062.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
[not found] ` <2142316909.1477341283065016062.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 7:01 ` caiqian-H+wXaHxf7aLQT0dZR+AlfA
[not found] ` <181596874.1477361283065264575.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 8:57 ` Tejun Heo
[not found] ` <4C7A20F6.5070802-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2010-08-29 11:24 ` CAI Qian
[not found] ` <633505726.1479321283081093502.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-29 11:21 ` Tejun Heo
[not found] <229468156.1475641283020469212.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
[not found] ` <229468156.1475641283020469212.JavaMail.root-k5qu2F3t005+R5eDjrG6zsCp5Q1pQRjfhaY/URYTgi6ny3qCrzbmXA@public.gmane.org>
2010-08-28 18:36 ` caiqian-H+wXaHxf7aLQT0dZR+AlfA
[not found] <2082161789.1474781283008521258.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com>
2010-08-28 15:19 ` caiqian
2010-08-27 12:35 CAI Qian
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4C7BA918.60707@kernel.org \
--to=tj-dgejt+ai2ygdnm+yrofe0a@public.gmane.org \
--cc=caiqian-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=kexec-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org \
--cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=linux-next-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=torvalds-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).