[PATCH 0/4] workqueue: Destroy workers in idle_cull

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn().
@ 2024-06-21  7:32 Lai Jiangshan
  2024-06-21  7:32 ` [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion Lai Jiangshan
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Lai Jiangshan @ 2024-06-21  7:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Lai Jiangshan, Tejun Heo, Valentin Schneider

From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

After e02b93124855("workqueue: Unbind kworkers before sending them to
exit()"), the code to kick off the destruction of workers is now in a
process context (idle_cull_fn()), more destruction code can also be
moved to the said process context to simplify the destroying.

Cc: Tejun Heo <tj@kernel.org>
Cc: Valentin Schneider <vschneid@redhat.com>

Lai Jiangshan (4):
  workqueue: Reap workers via kthread_stop() and remove
    detach_completion
  workqueue: Don't bind the rescuer in the last working cpu
  workqueue: Detach workers directly in idle_cull_fn()
  workqueue: Remove useless pool->dying_workers

 kernel/workqueue.c | 87 +++++++++++++++++++++-------------------------
 1 file changed, 40 insertions(+), 47 deletions(-)

-- 
2.19.1.6.gb485710b


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion
  2024-06-21  7:32 [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn() Lai Jiangshan
@ 2024-06-21  7:32 ` Lai Jiangshan
  2024-07-23 16:19   ` Marc Hartmayer
  2024-06-21  7:32 ` [PATCH 2/4] workqueue: Don't bind the rescuer in the last working cpu Lai Jiangshan
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Lai Jiangshan @ 2024-06-21  7:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Lai Jiangshan, Valentin Schneider, Tejun Heo, Lai Jiangshan

From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

The code to kick off the destruction of workers is now in a process
context (idle_cull_fn()), so kthread_stop() can be used in the process
context to replace the work of pool->detach_completion.

The wakeup in wake_dying_workers() is unneeded after this change, but it
is harmless, jut keep it here until next patch renames wake_dying_workers()
rather than renaming it again and again.

Cc: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
 kernel/workqueue.c | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 25a7d9a1a7ae..a0fb2f60e938 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -216,7 +216,6 @@ struct worker_pool {
 	struct worker		*manager;	/* L: purely informational */
 	struct list_head	workers;	/* A: attached workers */
 	struct list_head        dying_workers;  /* A: workers about to die */
-	struct completion	*detach_completion; /* all workers detached */
 
 	struct ida		worker_ida;	/* worker IDs for task name */
 
@@ -2696,7 +2695,6 @@ static void worker_attach_to_pool(struct worker *worker,
 static void worker_detach_from_pool(struct worker *worker)
 {
 	struct worker_pool *pool = worker->pool;
-	struct completion *detach_completion = NULL;
 
 	/* there is one permanent BH worker per CPU which should never detach */
 	WARN_ON_ONCE(pool->flags & POOL_BH);
@@ -2707,15 +2705,10 @@ static void worker_detach_from_pool(struct worker *worker)
 	list_del(&worker->node);
 	worker->pool = NULL;
 
-	if (list_empty(&pool->workers) && list_empty(&pool->dying_workers))
-		detach_completion = pool->detach_completion;
 	mutex_unlock(&wq_pool_attach_mutex);
 
 	/* clear leftover flags without pool->lock after it is detached */
 	worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
-
-	if (detach_completion)
-		complete(detach_completion);
 }
 
 /**
@@ -2816,10 +2809,9 @@ static void unbind_worker(struct worker *worker)
 
 static void wake_dying_workers(struct list_head *cull_list)
 {
-	struct worker *worker, *tmp;
+	struct worker *worker;
 
-	list_for_each_entry_safe(worker, tmp, cull_list, entry) {
-		list_del_init(&worker->entry);
+	list_for_each_entry(worker, cull_list, entry) {
 		unbind_worker(worker);
 		/*
 		 * If the worker was somehow already running, then it had to be
@@ -2835,6 +2827,17 @@ static void wake_dying_workers(struct list_head *cull_list)
 	}
 }
 
+static void reap_dying_workers(struct list_head *cull_list)
+{
+	struct worker *worker, *tmp;
+
+	list_for_each_entry_safe(worker, tmp, cull_list, entry) {
+		list_del_init(&worker->entry);
+		kthread_stop_put(worker->task);
+		kfree(worker);
+	}
+}
+
 /**
  * set_worker_dying - Tag a worker for destruction
  * @worker: worker to be destroyed
@@ -2866,6 +2869,9 @@ static void set_worker_dying(struct worker *worker, struct list_head *list)
 
 	list_move(&worker->entry, list);
 	list_move(&worker->node, &pool->dying_workers);
+
+	/* get an extra task struct reference for later kthread_stop_put() */
+	get_task_struct(worker->task);
 }
 
 /**
@@ -2949,6 +2955,8 @@ static void idle_cull_fn(struct work_struct *work)
 	raw_spin_unlock_irq(&pool->lock);
 	wake_dying_workers(&cull_list);
 	mutex_unlock(&wq_pool_attach_mutex);
+
+	reap_dying_workers(&cull_list);
 }
 
 static void send_mayday(struct work_struct *work)
@@ -3330,7 +3338,6 @@ static int worker_thread(void *__worker)
 		ida_free(&pool->worker_ida, worker->id);
 		worker_detach_from_pool(worker);
 		WARN_ON_ONCE(!list_empty(&worker->entry));
-		kfree(worker);
 		return 0;
 	}
 
@@ -4863,7 +4870,6 @@ static void rcu_free_pool(struct rcu_head *rcu)
  */
 static void put_unbound_pool(struct worker_pool *pool)
 {
-	DECLARE_COMPLETION_ONSTACK(detach_completion);
 	struct worker *worker;
 	LIST_HEAD(cull_list);
 
@@ -4917,12 +4923,9 @@ static void put_unbound_pool(struct worker_pool *pool)
 
 	wake_dying_workers(&cull_list);
 
-	if (!list_empty(&pool->workers) || !list_empty(&pool->dying_workers))
-		pool->detach_completion = &detach_completion;
 	mutex_unlock(&wq_pool_attach_mutex);
 
-	if (pool->detach_completion)
-		wait_for_completion(pool->detach_completion);
+	reap_dying_workers(&cull_list);
 
 	/* shut down the timers */
 	del_timer_sync(&pool->idle_timer);
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/4] workqueue: Don't bind the rescuer in the last working cpu
  2024-06-21  7:32 [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn() Lai Jiangshan
  2024-06-21  7:32 ` [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion Lai Jiangshan
@ 2024-06-21  7:32 ` Lai Jiangshan
  2024-06-21  7:32 ` [PATCH 3/4] workqueue: Detach workers directly in idle_cull_fn() Lai Jiangshan
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 15+ messages in thread
From: Lai Jiangshan @ 2024-06-21  7:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Lai Jiangshan, Valentin Schneider, Tejun Heo, Lai Jiangshan

From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

So that when the rescuer is woken up next time, it will not interrupt
the last working cpu which might be busy on other crucial works but
have nothing to do with the rescuer's incoming works.

Cc: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
 kernel/workqueue.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a0fb2f60e938..93b87ca63a7e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2684,6 +2684,17 @@ static void worker_attach_to_pool(struct worker *worker,
 	mutex_unlock(&wq_pool_attach_mutex);
 }
 
+static void unbind_worker(struct worker *worker)
+{
+	lockdep_assert_held(&wq_pool_attach_mutex);
+
+	kthread_set_per_cpu(worker->task, -1);
+	if (cpumask_intersects(wq_unbound_cpumask, cpu_active_mask))
+		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
+	else
+		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
+}
+
 /**
  * worker_detach_from_pool() - detach a worker from its pool
  * @worker: worker which is attached to its pool
@@ -2701,7 +2712,7 @@ static void worker_detach_from_pool(struct worker *worker)
 
 	mutex_lock(&wq_pool_attach_mutex);
 
-	kthread_set_per_cpu(worker->task, -1);
+	unbind_worker(worker);
 	list_del(&worker->node);
 	worker->pool = NULL;
 
@@ -2796,17 +2807,6 @@ static struct worker *create_worker(struct worker_pool *pool)
 	return NULL;
 }
 
-static void unbind_worker(struct worker *worker)
-{
-	lockdep_assert_held(&wq_pool_attach_mutex);
-
-	kthread_set_per_cpu(worker->task, -1);
-	if (cpumask_intersects(wq_unbound_cpumask, cpu_active_mask))
-		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
-	else
-		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
-}
-
 static void wake_dying_workers(struct list_head *cull_list)
 {
 	struct worker *worker;
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/4] workqueue: Detach workers directly in idle_cull_fn()
  2024-06-21  7:32 [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn() Lai Jiangshan
  2024-06-21  7:32 ` [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion Lai Jiangshan
  2024-06-21  7:32 ` [PATCH 2/4] workqueue: Don't bind the rescuer in the last working cpu Lai Jiangshan
@ 2024-06-21  7:32 ` Lai Jiangshan
  2024-06-21  7:32 ` [PATCH 4/4] workqueue: Remove useless pool->dying_workers Lai Jiangshan
  2024-06-21 22:34 ` [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn() Tejun Heo
  4 siblings, 0 replies; 15+ messages in thread
From: Lai Jiangshan @ 2024-06-21  7:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Lai Jiangshan, Valentin Schneider, Tejun Heo, Lai Jiangshan

From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

The code to kick off the destruction of workers is now in a process
context (idle_cull_fn()), and the detaching of a worker is not required
to be inside the worker thread now, so just do the detaching directly
in idle_cull_fn().

wake_dying_workers() is renamed to detach_dying_workers() and the unneeded
wakeup in wake_dying_workers() is also removed.

Cc: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
 kernel/workqueue.c | 45 +++++++++++++++++++--------------------------
 1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 93b87ca63a7e..e9ca42ef425a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2695,6 +2695,16 @@ static void unbind_worker(struct worker *worker)
 		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
 }
 
+
+static void detach_worker(struct worker *worker)
+{
+	lockdep_assert_held(&wq_pool_attach_mutex);
+
+	unbind_worker(worker);
+	list_del(&worker->node);
+	worker->pool = NULL;
+}
+
 /**
  * worker_detach_from_pool() - detach a worker from its pool
  * @worker: worker which is attached to its pool
@@ -2711,11 +2721,7 @@ static void worker_detach_from_pool(struct worker *worker)
 	WARN_ON_ONCE(pool->flags & POOL_BH);
 
 	mutex_lock(&wq_pool_attach_mutex);
-
-	unbind_worker(worker);
-	list_del(&worker->node);
-	worker->pool = NULL;
-
+	detach_worker(worker);
 	mutex_unlock(&wq_pool_attach_mutex);
 
 	/* clear leftover flags without pool->lock after it is detached */
@@ -2807,24 +2813,12 @@ static struct worker *create_worker(struct worker_pool *pool)
 	return NULL;
 }
 
-static void wake_dying_workers(struct list_head *cull_list)
+static void detach_dying_workers(struct list_head *cull_list)
 {
 	struct worker *worker;
 
-	list_for_each_entry(worker, cull_list, entry) {
-		unbind_worker(worker);
-		/*
-		 * If the worker was somehow already running, then it had to be
-		 * in pool->idle_list when set_worker_dying() happened or we
-		 * wouldn't have gotten here.
-		 *
-		 * Thus, the worker must either have observed the WORKER_DIE
-		 * flag, or have set its state to TASK_IDLE. Either way, the
-		 * below will be observed by the worker and is safe to do
-		 * outside of pool->lock.
-		 */
-		wake_up_process(worker->task);
-	}
+	list_for_each_entry(worker, cull_list, entry)
+		detach_worker(worker);
 }
 
 static void reap_dying_workers(struct list_head *cull_list)
@@ -2930,9 +2924,9 @@ static void idle_cull_fn(struct work_struct *work)
 
 	/*
 	 * Grabbing wq_pool_attach_mutex here ensures an already-running worker
-	 * cannot proceed beyong worker_detach_from_pool() in its self-destruct
-	 * path. This is required as a previously-preempted worker could run after
-	 * set_worker_dying() has happened but before wake_dying_workers() did.
+	 * cannot proceed beyong set_pf_worker() in its self-destruct path.
+	 * This is required as a previously-preempted worker could run after
+	 * set_worker_dying() has happened but before detach_dying_workers() did.
 	 */
 	mutex_lock(&wq_pool_attach_mutex);
 	raw_spin_lock_irq(&pool->lock);
@@ -2953,7 +2947,7 @@ static void idle_cull_fn(struct work_struct *work)
 	}
 
 	raw_spin_unlock_irq(&pool->lock);
-	wake_dying_workers(&cull_list);
+	detach_dying_workers(&cull_list);
 	mutex_unlock(&wq_pool_attach_mutex);
 
 	reap_dying_workers(&cull_list);
@@ -3336,7 +3330,6 @@ static int worker_thread(void *__worker)
 
 		set_task_comm(worker->task, "kworker/dying");
 		ida_free(&pool->worker_ida, worker->id);
-		worker_detach_from_pool(worker);
 		WARN_ON_ONCE(!list_empty(&worker->entry));
 		return 0;
 	}
@@ -4921,7 +4914,7 @@ static void put_unbound_pool(struct worker_pool *pool)
 	WARN_ON(pool->nr_workers || pool->nr_idle);
 	raw_spin_unlock_irq(&pool->lock);
 
-	wake_dying_workers(&cull_list);
+	detach_dying_workers(&cull_list);
 
 	mutex_unlock(&wq_pool_attach_mutex);
 
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/4] workqueue: Remove useless pool->dying_workers
  2024-06-21  7:32 [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn() Lai Jiangshan
                   ` (2 preceding siblings ...)
  2024-06-21  7:32 ` [PATCH 3/4] workqueue: Detach workers directly in idle_cull_fn() Lai Jiangshan
@ 2024-06-21  7:32 ` Lai Jiangshan
  2024-06-21 22:34 ` [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn() Tejun Heo
  4 siblings, 0 replies; 15+ messages in thread
From: Lai Jiangshan @ 2024-06-21  7:32 UTC (permalink / raw)
  To: linux-kernel; +Cc: Lai Jiangshan, Valentin Schneider, Tejun Heo, Lai Jiangshan

From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

A dying worker is first moved from pool->workers to pool->dying_workers
in set_worker_dying() and removed from pool->dying_workers in
detach_dying_workers().  The whole procedure is in the some lock context
of wq_pool_attach_mutex.

So pool->dying_workers is useless, just remove it and keep the dying
worker in pool->workers after set_worker_dying() and remove it in
detach_dying_workers() with wq_pool_attach_mutex held.

Cc: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
 kernel/workqueue.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e9ca42ef425a..dc9acf8ecd0c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -215,7 +215,6 @@ struct worker_pool {
 
 	struct worker		*manager;	/* L: purely informational */
 	struct list_head	workers;	/* A: attached workers */
-	struct list_head        dying_workers;  /* A: workers about to die */
 
 	struct ida		worker_ida;	/* worker IDs for task name */
 
@@ -2862,7 +2861,6 @@ static void set_worker_dying(struct worker *worker, struct list_head *list)
 	worker->flags |= WORKER_DIE;
 
 	list_move(&worker->entry, list);
-	list_move(&worker->node, &pool->dying_workers);
 
 	/* get an extra task struct reference for later kthread_stop_put() */
 	get_task_struct(worker->task);
@@ -4721,7 +4719,6 @@ static int init_worker_pool(struct worker_pool *pool)
 	timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
 
 	INIT_LIST_HEAD(&pool->workers);
-	INIT_LIST_HEAD(&pool->dying_workers);
 
 	ida_init(&pool->worker_ida);
 	INIT_HLIST_NODE(&pool->hash_node);
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn().
  2024-06-21  7:32 [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn() Lai Jiangshan
                   ` (3 preceding siblings ...)
  2024-06-21  7:32 ` [PATCH 4/4] workqueue: Remove useless pool->dying_workers Lai Jiangshan
@ 2024-06-21 22:34 ` Tejun Heo
  4 siblings, 0 replies; 15+ messages in thread
From: Tejun Heo @ 2024-06-21 22:34 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: linux-kernel, Lai Jiangshan, Valentin Schneider

On Fri, Jun 21, 2024 at 03:32:21PM +0800, Lai Jiangshan wrote:
> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> 
> After e02b93124855("workqueue: Unbind kworkers before sending them to
> exit()"), the code to kick off the destruction of workers is now in a
> process context (idle_cull_fn()), more destruction code can also be
> moved to the said process context to simplify the destroying.

Applied to wq/for-6.11.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion
  2024-06-21  7:32 ` [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion Lai Jiangshan
@ 2024-07-23 16:19   ` Marc Hartmayer
  2024-07-25  0:11     ` Lai Jiangshan
  2024-09-10  9:45     ` Marc Hartmayer
  0 siblings, 2 replies; 15+ messages in thread
From: Marc Hartmayer @ 2024-07-23 16:19 UTC (permalink / raw)
  To: Lai Jiangshan, linux-kernel
  Cc: Lai Jiangshan, Valentin Schneider, Tejun Heo, Lai Jiangshan,
	Heiko Carstens, Sven Schnelle, Mete Durlu

On Fri, Jun 21, 2024 at 03:32 PM +0800, Lai Jiangshan <jiangshanlai@gmail.com> wrote:
> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>
> The code to kick off the destruction of workers is now in a process
> context (idle_cull_fn()), so kthread_stop() can be used in the process
> context to replace the work of pool->detach_completion.
>
> The wakeup in wake_dying_workers() is unneeded after this change, but it
> is harmless, jut keep it here until next patch renames wake_dying_workers()
> rather than renaming it again and again.
>
> Cc: Valentin Schneider <vschneid@redhat.com>
> Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> ---
>  kernel/workqueue.c | 35 +++++++++++++++++++----------------
>  1 file changed, 19 insertions(+), 16 deletions(-)
>

Hi Lai,

a bisect of a regression in our CI on s390x led to this patch. The bug
is pretty easy to reproduce (currently, I only tested it on s390x - will
try to test it on x86 as well):

1. Start a Linux QEMU/KVM guest with 2 cores using this patch and enable
   `panic_on_warn=1` for the guest kernel.
2. Run the following command in the KVM guest:

  $  dd if=/dev/zero of=/dev/null & while : ; do chcpu -d 1; chcpu -e 1; done

3. Wait for the crash. e.g.:

2024/07/23 18:01:21 [M83LP63]: [  157.267727] ------------[ cut here ]------------
2024/07/23 18:01:21 [M83LP63]: [  157.267735] WARNING: CPU: 21 PID: 725 at kernel/workqueue.c:3340 worker_thread+0x54e/0x558
2024/07/23 18:01:21 [M83LP63]: [  157.267746] Modules linked in: binfmt_misc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables sunrpc dm_service_time s390_trng vfio_ccw mdev vfio_iommu_type1 vfio sch_fq_codel
2024/07/23 18:01:21 [M83LP63]: loop dm_multipath configfs nfnetlink lcs ctcm fsm zfcp scsi_transport_fc ghash_s390 prng chacha_s390 libchacha aes_s390 des_s390 libdes sha3_512_s390 sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common scm_block eadm_sch scsi_dh_rdac scsi_dh_emc scsi_dh_alua pkey zcrypt rng_core autofs4
2024/07/23 18:01:21 [M83LP63]: [  157.267792] CPU: 21 PID: 725 Comm: kworker/dying Not tainted 6.10.0-rc2-00239-g68f83057b913 #95
2024/07/23 18:01:21 [M83LP63]: [  157.267796] Hardware name: IBM 3906 M04 704 (LPAR)
2024/07/23 18:01:21 [M83LP63]: [  157.267802]            R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3
2024/07/23 18:01:21 [M83LP63]: [  157.267797] Krnl PSW : 0704d00180000000 000003d600fcd9fa (worker_thread+0x552/0x558)
2024/07/23 18:01:21 [M83LP63]: [  157.267806] Krnl GPRS: 6479696e6700776f 000002c901b62780 000003d602493ec8 000002c914954600
2024/07/23 18:01:21 [M83LP63]: [  157.267809]            0000000000000000 0000000000000008 000002c901a85400 000002c90719e840
2024/07/23 18:01:21 [M83LP63]: [  157.267811]            000002c90719e880 000002c901a85420 000002c91127adf0 000002c901a85400
2024/07/23 18:01:21 [M83LP63]: [  157.267813]            000002c914954600 0000000000000000 000003d600fcd772 000003560452bd98
2024/07/23 18:01:21 [M83LP63]: [  157.267822] Krnl Code: 000003d600fcd9ec: c0e500674262        brasl   %r14,000003d601cb5eb0
2024/07/23 18:01:21 [M83LP63]: [  157.267822]            000003d600fcd9f2: a7f4ffc8            brc     15,000003d600fcd982
2024/07/23 18:01:21 [M83LP63]: [  157.267822]           #000003d600fcd9f6: af000000            mc      0,0
2024/07/23 18:01:21 [M83LP63]: [  157.267822]           >000003d600fcd9fa: a7f4fec2            brc     15,000003d600fcd77e
2024/07/23 18:01:21 [M83LP63]: [  157.267822]            000003d600fcd9fe: 0707                bcr     0,%r7
2024/07/23 18:01:21 [M83LP63]: [  157.267822]            000003d600fcda00: c00400682e10        brcl    0,000003d601cd3620
2024/07/23 18:01:21 [M83LP63]: [  157.267822]            000003d600fcda06: eb7ff0500024        stmg    %r7,%r15,80(%r15)
2024/07/23 18:01:21 [M83LP63]: [  157.267822]            000003d600fcda0c: b90400ef            lgr     %r14,%r15
2024/07/23 18:01:21 [M83LP63]: [  157.267853] Call Trace:
2024/07/23 18:01:21 [M83LP63]: [  157.267855]  [<000003d600fcd9fa>] worker_thread+0x552/0x558
2024/07/23 18:01:21 [M83LP63]: [  157.267859] ([<000003d600fcd772>] worker_thread+0x2ca/0x558)
2024/07/23 18:01:21 [M83LP63]: [  157.267862]  [<000003d600fd6c80>] kthread+0x120/0x128
2024/07/23 18:01:21 [M83LP63]: [  157.267865]  [<000003d600f5305c>] __ret_from_fork+0x3c/0x58
2024/07/23 18:01:21 [M83LP63]: [  157.267868]  [<000003d601cc746a>] ret_from_fork+0xa/0x30
2024/07/23 18:01:21 [M83LP63]: [  157.267873] Last Breaking-Event-Address:
2024/07/23 18:01:21 [M83LP63]: [  157.267874]  [<000003d600fcd778>] worker_thread+0x2d0/0x558
2024/07/23 18:01:21 [M83LP63]: [  157.267879] Kernel panic - not syncing: kernel: panic_on_warn set ...
2024/07/23 18:01:22 [M83LP63]: [  157.267881] CPU: 21 PID: 725 Comm: kworker/dying Not tainted 6.10.0-rc2-00239-g68f83057b913 #95
2024/07/23 18:01:22 [M83LP63]: [  157.267884] Hardware name: IBM 3906 M04 704 (LPAR)
2024/07/23 18:01:22 [M83LP63]: [  157.267885] Call Trace:
2024/07/23 18:01:22 [M83LP63]: [  157.267886]  [<000003d600fa7f8c>] panic+0x1ec/0x308
2024/07/23 18:01:22 [M83LP63]: [  157.267892]  [<000003d600fa822c>] check_panic_on_warn+0x84/0x88
2024/07/23 18:01:22 [M83LP63]: [  157.267896]  [<000003d600fa846e>] __warn+0xa6/0x160
2024/07/23 18:01:22 [M83LP63]: [  157.267899]  [<000003d601c8ac7e>] report_bug+0x146/0x1c0
2024/07/23 18:01:22 [M83LP63]: [  157.267903]  [<000003d600f50e64>] monitor_event_exception+0x44/0x80
2024/07/23 18:01:22 [M83LP63]: [  157.267905]  [<000003d601cb67e0>] __do_pgm_check+0xf0/0x1b0
2024/07/23 18:01:22 [M83LP63]: [  157.267911]  [<000003d601cc75a8>] pgm_check_handler+0x118/0x168
2024/07/23 18:01:22 [M83LP63]: [  157.267914]  [<000003d600fcd9fa>] worker_thread+0x552/0x558
2024/07/23 18:01:22 [M83LP63]: [  157.267917] ([<000003d600fcd772>] worker_thread+0x2ca/0x558)
2024/07/23 18:01:22 [M83LP63]: [  157.267920]  [<000003d600fd6c80>] kthread+0x120/0x128
2024/07/23 18:01:22 [M83LP63]: [  157.267923]  [<000003d600f5305c>] __ret_from_fork+0x3c/0x58
2024/07/23 18:01:22 [M83LP63]: [  157.267926]  [<000003d601cc746a>] ret_from_fork+0xa/0x30

-- 
Kind regards / Beste Grüße
   Marc Hartmayer

IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion
  2024-07-23 16:19   ` Marc Hartmayer
@ 2024-07-25  0:11     ` Lai Jiangshan
  2024-07-29  1:49       ` Lai Jiangshan
  2024-09-10  9:45     ` Marc Hartmayer
  1 sibling, 1 reply; 15+ messages in thread
From: Lai Jiangshan @ 2024-07-25  0:11 UTC (permalink / raw)
  To: Marc Hartmayer
  Cc: linux-kernel, Lai Jiangshan, Valentin Schneider, Tejun Heo,
	Heiko Carstens, Sven Schnelle, Mete Durlu

Hello Marc

Thank you for the report.

On Wed, Jul 24, 2024 at 12:19 AM Marc Hartmayer <mhartmay@linux.ibm.com> wrote:

> Hi Lai,
>
> a bisect of a regression in our CI on s390x led to this patch. The bug
> is pretty easy to reproduce (currently, I only tested it on s390x - will
> try to test it on x86 as well):

I can't reproduce it in x86 after testing it for only 30 minutes.
It can definitely theoretically happen in x86.

>
> 1. Start a Linux QEMU/KVM guest with 2 cores using this patch and enable
>    `panic_on_warn=1` for the guest kernel.
> 2. Run the following command in the KVM guest:
>
>   $  dd if=/dev/zero of=/dev/null & while : ; do chcpu -d 1; chcpu -e 1; done
>
> 3. Wait for the crash. e.g.:
>
> 2024/07/23 18:01:21 [M83LP63]: [  157.267727] ------------[ cut here ]------------
> 2024/07/23 18:01:21 [M83LP63]: [  157.267735] WARNING: CPU: 21 PID: 725 at kernel/workqueue.c:3340 worker_thread+0x54e/0x558


> @@ -3330,7 +3338,6 @@ static int worker_thread(void *__worker)
>                 ida_free(&pool->worker_ida, worker->id);
>                 worker_detach_from_pool(worker);
>                 WARN_ON_ONCE(!list_empty(&worker->entry));
> -               kfree(worker);
>                 return 0;
>         }

The condition "!list_empty(&worker->entry)" can be true when the
worker is still in the cull_list awaiting being reaped by
reap_dying_workers() after
this change.

I will remove the WARN_ON_ONCE().

Thanks
Lai

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion
  2024-07-25  0:11     ` Lai Jiangshan
@ 2024-07-29  1:49       ` Lai Jiangshan
  0 siblings, 0 replies; 15+ messages in thread
From: Lai Jiangshan @ 2024-07-29  1:49 UTC (permalink / raw)
  To: Marc Hartmayer
  Cc: linux-kernel, Lai Jiangshan, Valentin Schneider, Tejun Heo,
	Heiko Carstens, Sven Schnelle, Mete Durlu

Hello Marc

On Thu, Jul 25, 2024 at 8:11 AM Lai Jiangshan <jiangshanlai@gmail.com> wrote:
> > 2024/07/23 18:01:21 [M83LP63]: [  157.267727] ------------[ cut here ]------------
> > 2024/07/23 18:01:21 [M83LP63]: [  157.267735] WARNING: CPU: 21 PID: 725 at kernel/workqueue.c:3340 worker_thread+0x54e/0x558
>
>
> > @@ -3330,7 +3338,6 @@ static int worker_thread(void *__worker)
> >                 ida_free(&pool->worker_ida, worker->id);
> >                 worker_detach_from_pool(worker);
> >                 WARN_ON_ONCE(!list_empty(&worker->entry));
> > -               kfree(worker);
> >                 return 0;
> >         }
>
> The condition "!list_empty(&worker->entry)" can be true when the
> worker is still in the cull_list awaiting being reaped by
> reap_dying_workers() after
> this change.
>
> I will remove the WARN_ON_ONCE().
>

Here is the fix:
https://lore.kernel.org/lkml/20240725010437.694727-1-jiangshanlai@gmail.com/

Could you take a look please.

Thank you for the report,
Lai

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion
  2024-07-23 16:19   ` Marc Hartmayer
  2024-07-25  0:11     ` Lai Jiangshan
@ 2024-09-10  9:45     ` Marc Hartmayer
  2024-09-10 16:29       ` Marc Hartmayer
  1 sibling, 1 reply; 15+ messages in thread
From: Marc Hartmayer @ 2024-09-10  9:45 UTC (permalink / raw)
  To: Lai Jiangshan, linux-kernel
  Cc: Lai Jiangshan, Valentin Schneider, Tejun Heo, Lai Jiangshan,
	Heiko Carstens, Sven Schnelle, Mete Durlu, Christian Borntraeger

On Tue, Jul 23, 2024 at 06:19 PM +0200, "Marc Hartmayer" <mhartmay@linux.ibm.com> wrote:
> On Fri, Jun 21, 2024 at 03:32 PM +0800, Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>>
>> The code to kick off the destruction of workers is now in a process
>> context (idle_cull_fn()), so kthread_stop() can be used in the process
>> context to replace the work of pool->detach_completion.
>>
>> The wakeup in wake_dying_workers() is unneeded after this change, but it
>> is harmless, jut keep it here until next patch renames wake_dying_workers()
>> rather than renaming it again and again.
>>
>> Cc: Valentin Schneider <vschneid@redhat.com>
>> Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>> ---
>>  kernel/workqueue.c | 35 +++++++++++++++++++----------------
>>  1 file changed, 19 insertions(+), 16 deletions(-)
>>
>

Hi Lai,

I’m not sure if this NULL-pointer crash is related to this patch series
or not. But it is triggered by the same test that also triggered the
other problem that I reported.

        [   23.133876] Unable to handle kernel pointer dereference in virtual kernel address space
        [   23.133950] Failing address: 0000000000000000 TEID: 0000000000000483
        [   23.133954] Fault in home space mode while using kernel ASCE.
        [   23.133957] AS:000000001b8f0007 R3:0000000056cf4007 S:0000000056cf3800 P:000000000000003d 
        [   23.134207] Oops: 0004 ilc:2 [#1] SMP 
        [   23.134273] Modules linked in: essiv authenc dm_crypt encrypted_keys loop pkey zcrypt s390_trng rng_core ghash_s390 prng chacha_s390 libchacha aes_s390 des_s390 virtio_console libdes vmw_vsock_virtio_transport vmw_vsock_virtio_transport_common sha3_512_s390 vsock sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common vfio_ccw mdev vfio_iommu_type1 vfio sch_fq_codel drm i2c_core drm_panel_orientation_quirks configfs autofs4
        [   23.134386] CPU: 0 UID: 0 PID: 376 Comm: kworker/u10:2 Not tainted 6.11.0-20240902.rc6.git1.67784a74e258.300.fc40.s390x+git #1
        [   23.134394] Hardware name: IBM 8561 T01 703 (KVM/Linux)
        [   23.134406] Workqueue:  0x0 ()
        [   23.134440] Krnl PSW : 0404c00180000000 0000024e326caf28 (worker_thread+0x48/0x430)
        [   23.134471]            R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
        [   23.134474] Krnl GPRS: 0000000058778000 0000000000000000 0000024e00000001 0000000058778000
        [   23.134476]            0000000000000000 0000000058778000 0000000057b8e240 0000000000000002
        [   23.134480]            0000000000000000 0000000000000028 0000000000000000 0000000057b8e240
        [   23.134481]            0000000058778000 0000000058778000 0000024e326caf18 000001ce32953d88
        [   23.134499] Krnl Code: 0000024e326caf1c: acfcf0c8		stnsm	200(%r15),252
        [   23.134499]            0000024e326caf20: a7180000		lhi	%r1,0
        [   23.134499]           #0000024e326caf24: 582083ac		l	%r2,940(%r8)
        [   23.134499]           >0000024e326caf28: ba12a000		cs	%r1,%r2,0(%r10)
        [   23.134499]            0000024e326caf2c: a77400cf		brc	7,0000024e326cb0ca
        [   23.134499]            0000024e326caf30: 5800b078		l	%r0,120(%r11)
        [   23.134499]            0000024e326caf34: a7010002		tmll	%r0,2
        [   23.134499]            0000024e326caf38: a77400d4		brc	7,0000024e326cb0e0
        [   23.134516] Call Trace:
        [   23.134520]  [<0000024e326caf28>] worker_thread+0x48/0x430 
        [   23.134525] ([<0000024e326caf18>] worker_thread+0x38/0x430)
        [   23.134528]  [<0000024e326d3a3e>] kthread+0x11e/0x130 
        [   23.134533]  [<0000024e3264b0dc>] __ret_from_fork+0x3c/0x60 
        [   23.134536]  [<0000024e333fb37a>] ret_from_fork+0xa/0x38 
        [   23.134552] Last Breaking-Event-Address:
        [   23.134553]  [<0000024e333f4c04>] mutex_unlock+0x24/0x30
        [   23.134562] Kernel panic - not syncing: Fatal exception: panic_on_oops

This happened with Linux
6.11.0-20240902.rc6.git1.67784a74e258.300.fc40.s390x (using defconfig),
but also with an older commit
6.11.0-20240719.rc0.git15.720261cfc732.300.fc40.s390x on s390x (both
kernels contain your patches). I have not bisected/debugged the
problem yet, but you may have an idea already. Will try to reproduce the
problem and give you more debug information.

Thanks!

[…snip]

-- 
Kind regards / Beste Grüße
   Marc Hartmayer

IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion
  2024-09-10  9:45     ` Marc Hartmayer
@ 2024-09-10 16:29       ` Marc Hartmayer
  2024-09-11  3:23         ` Lai Jiangshan
  0 siblings, 1 reply; 15+ messages in thread
From: Marc Hartmayer @ 2024-09-10 16:29 UTC (permalink / raw)
  To: Lai Jiangshan, linux-kernel
  Cc: Lai Jiangshan, Valentin Schneider, Tejun Heo, Lai Jiangshan,
	Heiko Carstens, Sven Schnelle, Mete Durlu, Christian Borntraeger

On Tue, Sep 10, 2024 at 11:45 AM +0200, "Marc Hartmayer" <mhartmay@linux.ibm.com> wrote:
> On Tue, Jul 23, 2024 at 06:19 PM +0200, "Marc Hartmayer" <mhartmay@linux.ibm.com> wrote:
>> On Fri, Jun 21, 2024 at 03:32 PM +0800, Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>>> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>>>
>>> The code to kick off the destruction of workers is now in a process
>>> context (idle_cull_fn()), so kthread_stop() can be used in the process
>>> context to replace the work of pool->detach_completion.
>>>
>>> The wakeup in wake_dying_workers() is unneeded after this change, but it
>>> is harmless, jut keep it here until next patch renames wake_dying_workers()
>>> rather than renaming it again and again.
>>>
>>> Cc: Valentin Schneider <vschneid@redhat.com>
>>> Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>>> ---
>>>  kernel/workqueue.c | 35 +++++++++++++++++++----------------
>>>  1 file changed, 19 insertions(+), 16 deletions(-)
>>>
>>

[…snip…]

Hi Lai,

I’ve reproduced the issue using the Linux commit bc83b4d1f086. Here is
the “beautified” stacktrace (output of
`$LINUX/scripts/decode_stacktrace.sh vmlinux auto < dmesg.txt`).

...
[   14.271265] Unable to handle kernel pointer dereference in virtual kernel address space
[   14.271314] Failing address: 0000000000000000 TEID: 0000000000000483
[   14.271317] Fault in home space mode while using kernel ASCE.
[   14.271320] AS:000000001df84007 R3:0000000064888007 S:0000000064887800 P:000000000000003d
[   14.271519] Oops: 0004 ilc:2 [#1] SMP
[   14.271570] Modules linked in: essiv authenc dm_crypt encrypted_keys loop vmw_vsock_virtio_transport vmw_vsock_virtio_transport_common pkey vsock zcrypt s390_trng rng_core ghash_s390 prng chacha_s390 libchacha virtio_console aes_s390 des_s390 libdes sha3_512_s390 sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common vfio_ccw mdev vfio_iommu_type1 vfio sch_fq_codel drm i2c_core drm_panel_orientation_quirks configfs autofs4
[   14.271661] CPU: 0 UID: 0 PID: 324 Comm: kworker/u10:2 Not tainted 6.11.0-20240909.rc7.git8.bc83b4d1f086.300.fc40.s390x+git #1
[   14.271667] Hardware name: IBM 8561 T01 701 (KVM/Linux)
[   14.271677] Workqueue: . 0x0 ()
[   14.271702] Krnl PSW : 0404c00180000000 000002d8c205ef28 worker_thread (./arch/s390/include/asm/atomic_ops.h:198 ./arch/s390/include/asm/spinlock.h:61 ./arch/s390/include/asm/spinlock.h:66 ./include/linux/spinlock.h:187 ./include/linux/spinlock_api_smp.h:120 kernel/workqueue.c:3346)
[   14.271728]            R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
[   14.271730] Krnl GPRS: 00000000673f0000 0000000000000000 000002d800000001 00000000673f0000
[   14.271732]            0000000000000000 00000000673f0000 0000000067ac5b40 0000000000000002
[   14.271735]            0000000000000000 0000000000000028 0000000000000000 0000000067ac5b40
[   14.271736]            00000000673f0000 00000000673f0000 000002d8c205ef18 00000258c22a7d88
[ 14.271752] Krnl Code: 000002d8c205ef1c: acfcf0c8 stnsm 200(%r15),252
objdump: '/tmp/tmp.yRzOQQynJL.o': No such file
objdump: '/tmp/tmp.yRzOQQynJL.o': No such file
All code
========

Code starting with the faulting instruction
===========================================
000002d8c205ef20: a7180000            lhi     %r1,0
#000002d8c205ef24: 582083ac            l       %r2,940(%r8)
>000002d8c205ef28: ba12a000            cs      %r1,%r2,0(%r10)
000002d8c205ef2c: a77400cf            brc     7,000002d8c205f0ca
000002d8c205ef30: 5800b078            l       %r0,120(%r11)
000002d8c205ef34: a7010002            tmll    %r0,2
000002d8c205ef38: a77400d4            brc     7,000002d8c205f0e0
[   14.271766] Call Trace:
[   14.271769] worker_thread (./arch/s390/include/asm/atomic_ops.h:198 ./arch/s390/include/asm/spinlock.h:61 ./arch/s390/include/asm/spinlock.h:66 ./include/linux/spinlock.h:187 ./include/linux/spinlock_api_smp.h:120 kernel/workqueue.c:3346)
[   14.271774] worker_thread (./arch/s390/include/asm/lowcore.h:226 ./arch/s390/include/asm/spinlock.h:61 ./arch/s390/include/asm/spinlock.h:66 ./include/linux/spinlock.h:187 ./include/linux/spinlock_api_smp.h:120 kernel/workqueue.c:3346)
[   14.271777] kthread (kernel/kthread.c:389)
[   14.271781] __ret_from_fork (arch/s390/kernel/process.c:62)
[   14.271784] ret_from_fork (arch/s390/kernel/entry.S:309)
[   14.271806] Last Breaking-Event-Address:
[   14.271807] mutex_unlock (kernel/locking/mutex.c:549)

So it seems to me that `worker->pool` is NULL in the
`workqueue.c:worker_thread` function and this leads to the crash.

My next step is to try to bisect the bug.

-- 
Kind regards / Beste Grüße
   Marc Hartmayer

IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion
  2024-09-10 16:29       ` Marc Hartmayer
@ 2024-09-11  3:23         ` Lai Jiangshan
  2024-09-11  3:32           ` Lai Jiangshan
  0 siblings, 1 reply; 15+ messages in thread
From: Lai Jiangshan @ 2024-09-11  3:23 UTC (permalink / raw)
  To: Marc Hartmayer
  Cc: linux-kernel, Lai Jiangshan, Valentin Schneider, Tejun Heo,
	Heiko Carstens, Sven Schnelle, Mete Durlu, Christian Borntraeger

Hello, Marc

On Wed, Sep 11, 2024 at 12:29 AM Marc Hartmayer <mhartmay@linux.ibm.com>
> Code starting with the faulting instruction
> ===========================================
> 000002d8c205ef20: a7180000            lhi     %r1,0
> #000002d8c205ef24: 582083ac            l       %r2,940(%r8)
> >000002d8c205ef28: ba12a000            cs      %r1,%r2,0(%r10)
> 000002d8c205ef2c: a77400cf            brc     7,000002d8c205f0ca
> 000002d8c205ef30: 5800b078            l       %r0,120(%r11)
> 000002d8c205ef34: a7010002            tmll    %r0,2
> 000002d8c205ef38: a77400d4            brc     7,000002d8c205f0e0
> [   14.271766] Call Trace:
> [   14.271769] worker_thread (./arch/s390/include/asm/atomic_ops.h:198 ./arch/s390/include/asm/spinlock.h:61 ./arch/s390/include/asm/spinlock.h:66 ./include/linux/spinlock.h:187 ./include/linux/spinlock_api_smp.h:120 kernel/workqueue.c:3346)
> [   14.271774] worker_thread (./arch/s390/include/asm/lowcore.h:226 ./arch/s390/include/asm/spinlock.h:61 ./arch/s390/include/asm/spinlock.h:66 ./include/linux/spinlock.h:187 ./include/linux/spinlock_api_smp.h:120 kernel/workqueue.c:3346)
> [   14.271777] kthread (kernel/kthread.c:389)
> [   14.271781] __ret_from_fork (arch/s390/kernel/process.c:62)
> [   14.271784] ret_from_fork (arch/s390/kernel/entry.S:309)
> [   14.271806] Last Breaking-Event-Address:
> [   14.271807] mutex_unlock (kernel/locking/mutex.c:549)
>
> So it seems to me that `worker->pool` is NULL in the
> `workqueue.c:worker_thread` function and this leads to the crash.
>

I'm not familiar with s390 asm code, but it might be the case that
`worker->pool` is NULL in the in worker_thread() since detach_worker()
resets worker->pool to NULL.

If it is the case, READ_ONCE(worker->pool) should be used in worker_thread()
to fix the problem.

(It is weird to me if worker->pool is read multi-time in worker_thread()
since it is used many times, but since READ_ONCE() is not used, it can
be possible).

Thanks
Lai

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion
  2024-09-11  3:23         ` Lai Jiangshan
@ 2024-09-11  3:32           ` Lai Jiangshan
  2024-09-11  8:27             ` Marc Hartmayer
  0 siblings, 1 reply; 15+ messages in thread
From: Lai Jiangshan @ 2024-09-11  3:32 UTC (permalink / raw)
  To: Marc Hartmayer
  Cc: linux-kernel, Lai Jiangshan, Valentin Schneider, Tejun Heo,
	Heiko Carstens, Sven Schnelle, Mete Durlu, Christian Borntraeger

On Wed, Sep 11, 2024 at 11:23 AM Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>
> Hello, Marc
>
> On Wed, Sep 11, 2024 at 12:29 AM Marc Hartmayer <mhartmay@linux.ibm.com>
> > Code starting with the faulting instruction
> > ===========================================
> > 000002d8c205ef20: a7180000            lhi     %r1,0
> > #000002d8c205ef24: 582083ac            l       %r2,940(%r8)
> > >000002d8c205ef28: ba12a000            cs      %r1,%r2,0(%r10)
> > 000002d8c205ef2c: a77400cf            brc     7,000002d8c205f0ca
> > 000002d8c205ef30: 5800b078            l       %r0,120(%r11)
> > 000002d8c205ef34: a7010002            tmll    %r0,2
> > 000002d8c205ef38: a77400d4            brc     7,000002d8c205f0e0
> > [   14.271766] Call Trace:
> > [   14.271769] worker_thread (./arch/s390/include/asm/atomic_ops.h:198 ./arch/s390/include/asm/spinlock.h:61 ./arch/s390/include/asm/spinlock.h:66 ./include/linux/spinlock.h:187 ./include/linux/spinlock_api_smp.h:120 kernel/workqueue.c:3346)
> > [   14.271774] worker_thread (./arch/s390/include/asm/lowcore.h:226 ./arch/s390/include/asm/spinlock.h:61 ./arch/s390/include/asm/spinlock.h:66 ./include/linux/spinlock.h:187 ./include/linux/spinlock_api_smp.h:120 kernel/workqueue.c:3346)
> > [   14.271777] kthread (kernel/kthread.c:389)
> > [   14.271781] __ret_from_fork (arch/s390/kernel/process.c:62)
> > [   14.271784] ret_from_fork (arch/s390/kernel/entry.S:309)
> > [   14.271806] Last Breaking-Event-Address:
> > [   14.271807] mutex_unlock (kernel/locking/mutex.c:549)
> >
> > So it seems to me that `worker->pool` is NULL in the
> > `workqueue.c:worker_thread` function and this leads to the crash.
> >
>
> I'm not familiar with s390 asm code, but it might be the case that
> `worker->pool` is NULL in the in worker_thread() since detach_worker()
> resets worker->pool to NULL.
>
> If it is the case, READ_ONCE(worker->pool) should be used in worker_thread()
> to fix the problem.
>
> (It is weird to me if worker->pool is read multi-time in worker_thread()
> since it is used many times, but since READ_ONCE() is not used, it can
> be possible).

Oh, it can be possible that the worker is created and then destroyed before
being waken-up. And if it is the case, READ_ONCE() won't help. I'm going to
explore if "worker->pool = NULL;" can be moved out from detach_worker().

Thanks
Lai

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion
  2024-09-11  3:32           ` Lai Jiangshan
@ 2024-09-11  8:27             ` Marc Hartmayer
  2024-09-11  9:37               ` Marc Hartmayer
  0 siblings, 1 reply; 15+ messages in thread
From: Marc Hartmayer @ 2024-09-11  8:27 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: linux-kernel, Lai Jiangshan, Valentin Schneider, Tejun Heo,
	Heiko Carstens, Sven Schnelle, Mete Durlu, Christian Borntraeger

On Wed, Sep 11, 2024 at 11:32 AM +0800, Lai Jiangshan <jiangshanlai@gmail.com> wrote:
> On Wed, Sep 11, 2024 at 11:23 AM Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>>
>> Hello, Marc
>>

Hi Lai,

[…snip…]

>>
>> I'm not familiar with s390 asm code, but it might be the case that
>> `worker->pool` is NULL in the in worker_thread() since detach_worker()
>> resets worker->pool to NULL.
>>
>> If it is the case, READ_ONCE(worker->pool) should be used in worker_thread()
>> to fix the problem.
>>
>> (It is weird to me if worker->pool is read multi-time in worker_thread()
>> since it is used many times, but since READ_ONCE() is not used, it can
>> be possible).
>
> Oh, it can be possible that the worker is created and then destroyed before
> being waken-up. And if it is the case, READ_ONCE() won't help. I'm going to
> explore if "worker->pool = NULL;" can be moved out from
> detach_worker().

I’ll double check if my assumption is true or not (worker->poll ==
NULL). It may well be that my assumption is wrong.

Thanks for having a look!

>
> Thanks
> Lai
-- 
Kind regards / Beste Grüße
   Marc Hartmayer

IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion
  2024-09-11  8:27             ` Marc Hartmayer
@ 2024-09-11  9:37               ` Marc Hartmayer
  0 siblings, 0 replies; 15+ messages in thread
From: Marc Hartmayer @ 2024-09-11  9:37 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: linux-kernel, Lai Jiangshan, Valentin Schneider, Tejun Heo,
	Heiko Carstens, Sven Schnelle, Mete Durlu, Christian Borntraeger

On Wed, Sep 11, 2024 at 10:27 AM +0200, "Marc Hartmayer" <mhartmay@linux.ibm.com> wrote:
> On Wed, Sep 11, 2024 at 11:32 AM +0800, Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>> On Wed, Sep 11, 2024 at 11:23 AM Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>>>
>>> Hello, Marc
>>>
>
> Hi Lai,
>
> […snip…]
>
>>>
>>> I'm not familiar with s390 asm code, but it might be the case that
>>> `worker->pool` is NULL in the in worker_thread() since detach_worker()
>>> resets worker->pool to NULL.
>>>
>>> If it is the case, READ_ONCE(worker->pool) should be used in worker_thread()
>>> to fix the problem.
>>>
>>> (It is weird to me if worker->pool is read multi-time in worker_thread()
>>> since it is used many times, but since READ_ONCE() is not used, it can
>>> be possible).
>>
>> Oh, it can be possible that the worker is created and then destroyed before
>> being waken-up. And if it is the case, READ_ONCE() won't help. I'm going to
>> explore if "worker->pool = NULL;" can be moved out from
>> detach_worker().
>
> I’ll double check if my assumption is true or not (worker->poll ==
> NULL). It may well be that my assumption is wrong.

I applied the following patch on top of commit bc83b4d1f086 ("Merge tag
'bcachefs-2024-09-09' of git://evilpiepirate.org/bcachefs")

From 9cd804f8e3183422b05a1b36e2544d1175736519 Mon Sep 17 00:00:00 2001
From: Marc Hartmayer <mhartmay@linux.ibm.com>
Date: Wed, 11 Sep 2024 09:11:41 +0000
Subject: [PATCH] Add printk-debug statements

Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
---
 kernel/workqueue.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e7b005ff3750..d4c5c68457f7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3338,11 +3338,16 @@ static void set_pf_worker(bool val)
 static int worker_thread(void *__worker)
 {
        struct worker *worker = __worker;
-       struct worker_pool *pool = worker->pool;
+       if (unlikely(!worker))
+               printk(KERN_ERR "OOOOOHHHH NOOOOOOO, WE DO NOT HAVE A WORKER.\n");
+
+       struct worker_pool *pool = READ_ONCE(worker->pool);

        /* tell the scheduler that this is a workqueue worker */
        set_pf_worker(true);
 woke_up:
+       if (unlikely(!pool))
+               printk(KERN_ERR "OOOOOHHHH NOOOOOOO, WE DO NOT HAVE A POOL.\n");
        raw_spin_lock_irq(&pool->lock);

        /* am I supposed to die? */
--
2.43.0

And it shows that pool is NULL in case of the crash. Hope this helps.

>
> Thanks for having a look!
>
>>
>> Thanks
>> Lai
> -- 
> Kind regards / Beste Grüße
>    Marc Hartmayer
>
> IBM Deutschland Research & Development GmbH
> Vorsitzender des Aufsichtsrats: Wolfgang Wendt
> Geschäftsführung: David Faller
> Sitz der Gesellschaft: Böblingen
> Registergericht: Amtsgericht Stuttgart, HRB 243294
-- 
Kind regards / Beste Grüße
   Marc Hartmayer

IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2024-09-11  9:37 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-21  7:32 [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn() Lai Jiangshan
2024-06-21  7:32 ` [PATCH 1/4] workqueue: Reap workers via kthread_stop() and remove detach_completion Lai Jiangshan
2024-07-23 16:19   ` Marc Hartmayer
2024-07-25  0:11     ` Lai Jiangshan
2024-07-29  1:49       ` Lai Jiangshan
2024-09-10  9:45     ` Marc Hartmayer
2024-09-10 16:29       ` Marc Hartmayer
2024-09-11  3:23         ` Lai Jiangshan
2024-09-11  3:32           ` Lai Jiangshan
2024-09-11  8:27             ` Marc Hartmayer
2024-09-11  9:37               ` Marc Hartmayer
2024-06-21  7:32 ` [PATCH 2/4] workqueue: Don't bind the rescuer in the last working cpu Lai Jiangshan
2024-06-21  7:32 ` [PATCH 3/4] workqueue: Detach workers directly in idle_cull_fn() Lai Jiangshan
2024-06-21  7:32 ` [PATCH 4/4] workqueue: Remove useless pool->dying_workers Lai Jiangshan
2024-06-21 22:34 ` [PATCH 0/4] workqueue: Destroy workers in idle_cull_fn() Tejun Heo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox