Soft lockups in __cancel_work_timer()

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Rabin Vincent <rabin.vincent@axis.com>
To: Tejun Heo <tj@kernel.org>
Cc: Jesper Nilsson <jespern@axis.com>, linux-kernel@vger.kernel.org
Subject: Soft lockups in __cancel_work_timer()
Date: Fri, 6 Feb 2015 18:11:56 +0100	[thread overview]
Message-ID: <20150206171156.GA8942@axis.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 2524 bytes --]

Hi Tejun,

The comment at the top of the try_to_grab_pending() says that the ENOENT
state "may persist for arbitrarily long".

If two threads call cancel_delayed_work_sync() at the same time, one of
them can end up looping in the loop in __cancel_work_timer() without
waiting in the flush_work() which is inside it.

The looping thread will see -ENOENT from try_to_grab_pending() because
the work is being cancelled, and it will see a false return from
flush_work()/start_flush_work() because there is no worker executing the
work.

task1                           task2                           worker

                                                                add to busy hash
                                                                clear pending
                                                                exec work

__cancel_work_timer()
 try_to_grab_pending()
   get pending, return 0
 set work cancelling
 flush_work()
   wait_for_completion()

                                                                remove from busy_hash

                                __cancel_work_timer()
                                while (forever) {
                                  try_to_grab_pending()
                                    return -ENOENT due to cancelling
                                  flush_work
                                    return false due to already gone
                                }


On kernels with CONFIG_PREEMPT disabled, this causes a permanent soft
lockup of the system.

Adding a cond_resched() to the loop in cancel_delayed_work_sync(), like
below, helps the simple !PREEMPT case, but does not completely fix the
problem, because the problem can still be seen if the thread which sees
the ENOENT has for example SCHED_FIFO scheduling class, both on systems
with CONFIG_PREEMPT enabled and on !PREEMPT with the cond_resched().

We've seen this problem with the cancel_delayed_work() call in
jffs2_sync_fs(), but I've attached a testing patch which forces the
problem on current mainline without the need for jffs2.

Suggestions?

Thanks.

/Rabin

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index beeeac9..904289f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2741,6 +2741,9 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 		 */
 		if (unlikely(ret == -ENOENT))
 			flush_work(work);
+
+		if (ret < 0)
+			cond_resched();
 	} while (unlikely(ret < 0));
 
 	/* tell other tasks trying to grab @work to back off */

[-- Attachment #2: force-wq-lockup.patch --]
[-- Type: text/x-diff, Size: 3667 bytes --]

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index beeeac9..c161255 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -49,6 +49,15 @@
 #include <linux/moduleparam.h>
 #include <linux/uaccess.h>
 
+extern void hack_set_state(int newstate);
+extern void hack_wait_for_state(int state);
+
+static bool want(struct work_struct *work)
+{
+	extern void hackfunc(struct work_struct *work);
+	return work->func == hackfunc;
+}
+
 #include "workqueue_internal.h"
 
 enum {
@@ -2006,6 +2015,9 @@ __acquires(&pool->lock)
 	 */
 	set_work_pool_and_clear_pending(work, pool->id);
 
+	if (want(work))
+		hack_set_state(20);
+
 	spin_unlock_irq(&pool->lock);
 
 	lock_map_acquire_read(&pwq->wq->lockdep_map);
@@ -2039,8 +2051,14 @@ __acquires(&pool->lock)
 	 */
 	cond_resched_rcu_qs();
 
+	if (want(work))
+		hack_wait_for_state(30);
+
 	spin_lock_irq(&pool->lock);
 
+	if (want(work))
+		hack_set_state(40);
+
 	/* clear cpu intensive status */
 	if (unlikely(cpu_intensive))
 		worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
@@ -2682,6 +2700,9 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 	insert_wq_barrier(pwq, barr, work, worker);
 	spin_unlock_irq(&pool->lock);
 
+	if (want(work) && !strcmp(current->comm, "thread1"))
+		hack_set_state(30);
+
 	/*
 	 * If @max_active is 1 or rescuer is in use, flushing another work
 	 * item on the same workqueue may lead to deadlock.  Make sure the
@@ -2733,6 +2754,12 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 	unsigned long flags;
 	int ret;
 
+	if (want(work) && !strcmp(current->comm, "thread1"))
+		hack_wait_for_state(20);
+
+	if (want(work) && !strcmp(current->comm, "thread2"))
+		hack_wait_for_state(40);
+
 	do {
 		ret = try_to_grab_pending(work, is_dwork, &flags);
 		/*
@@ -2741,6 +2768,9 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 		 */
 		if (unlikely(ret == -ENOENT))
 			flush_work(work);
+
+		if (want(work) && !strcmp(current->comm, "thread2"))
+			hack_set_state(50);
 	} while (unlikely(ret < 0));
 
 	/* tell other tasks trying to grab @work to back off */
@@ -2748,6 +2778,8 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 	local_irq_restore(flags);
 
 	flush_work(work);
+	if (want(work) && !strcmp(current->comm, "thread1"))
+		hack_wait_for_state(50);
 	clear_work_data(work);
 	return ret;
 }
@@ -4910,3 +4942,59 @@ static int __init init_workqueues(void)
 	return 0;
 }
 early_initcall(init_workqueues);
+
+static struct completion completions[100];
+
+void hack_set_state(int newstate)
+{
+	trace_printk("caller %pF sets state to %d\n", (void *)_RET_IP_, newstate);
+	complete(&completions[newstate]);
+}
+
+void hack_wait_for_state(int state)
+{
+	trace_printk("caller %pF wants state %d\n", (void *)_RET_IP_, state);
+	wait_for_completion(&completions[state]);
+	trace_printk("caller %pF got state %d\n", (void *)_RET_IP_, state);
+}
+
+void hackfunc(struct work_struct *work)
+{
+	printk("%s\n", __func__);
+}
+
+static DECLARE_DELAYED_WORK(hackwork, hackfunc);
+
+static int thread1(void *data)
+{
+	cancel_delayed_work_sync(&hackwork);
+
+	printk("%s: done\n", __func__);
+
+	return 0;
+}
+
+static int thread2(void *data)
+{
+	cancel_delayed_work_sync(&hackwork);
+
+	printk("%s: done\n", __func__);
+
+	return 0;
+}
+
+static int hack_init(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(completions); i++)
+		init_completion(&completions[i]);
+
+	kthread_run(thread1, NULL, "thread1");
+	kthread_run(thread2, NULL, "thread2");
+
+	queue_delayed_work(system_long_wq, &hackwork, msecs_to_jiffies(5));
+
+	return 0;
+}
+late_initcall(hack_init);

next             reply	other threads:[~2015-02-06 17:12 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-06 17:11 Rabin Vincent [this message]
2015-02-06 18:01 ` Soft lockups in __cancel_work_timer() Tejun Heo
2015-02-09 16:15 ` [PATCH for-3.20-fixes] workqueue: fix hang involving racing cancel[_delayed]_work_sync()'s for PREEMPT_NONE Tejun Heo
2015-02-09 16:29   ` Jesper Nilsson
2015-02-10  8:35     ` Jesper Nilsson
2015-02-10  9:33   ` Rabin Vincent
2015-03-02 12:26   ` Jesper Nilsson
2015-03-02 16:21     ` Tejun Heo
2015-03-03  7:52       ` Jesper Nilsson
2015-03-03 10:00       ` Tomeu Vizoso
2015-03-03 13:21         ` Tejun Heo
2015-03-03 13:45           ` [PATCH wq/for-4.0-fixes v2] " Tejun Heo
2015-03-03 13:57             ` Tomeu Vizoso
2015-03-05  9:24               ` Tomeu Vizoso
2015-03-05  9:36                 ` Tejun Heo
2015-03-05 11:46                   ` Tejun Heo
2015-03-05 13:03                 ` [PATCH v3 wq/for-4.0-fixes] " Tejun Heo

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:beeeac9 dfblob:904289f dfblob:beeeac9 dfblob:c161255 )
 OR (
bs:"Soft lockups in __cancel_work_timer()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150206171156.GA8942@axis.com \
    --to=rabin.vincent@axis.com \
    --cc=jespern@axis.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.