public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org
Subject: [PATCH wq/for-6.9] workqueue: Fix queue_work_on() with BH workqueues
Date: Wed, 14 Feb 2024 08:39:09 -1000	[thread overview]
Message-ID: <Zc0IzeM6tAvm1NTl@mtj.duckdns.org> (raw)

From 2f34d7337d98f3eae7bd3d1270efaf9d8a17cfc6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Feb 2024 08:33:55 -1000

When queue_work_on() is used to queue a BH work item on a remote CPU, the
work item is queued on that CPU but kick_pool() raises softirq on the local
CPU. This leads to stalls as the work item won't be executed until something
else on the remote CPU schedules a BH work item or tasklet locally.

Fix it by bouncing raising softirq to the target CPU using per-cpu irq_work.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 4cb1ef64609f ("workqueue: Implement BH workqueues to eventually replace tasklets")
---
Oops, I just found out that I was raising softirq on the wrong CPU when BH
work items are queued on remote CPUs. This fixes it. Applied to wq/for-6.9.

Thanks.

 kernel/workqueue.c | 41 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4950bfc2cdcc..04e35dbe6799 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -54,6 +54,7 @@
 #include <linux/nmi.h>
 #include <linux/kvm_para.h>
 #include <linux/delay.h>
+#include <linux/irq_work.h>
 
 #include "workqueue_internal.h"
 
@@ -457,6 +458,10 @@ static bool wq_debug_force_rr_cpu = false;
 #endif
 module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
 
+/* to raise softirq for the BH worker pools on other CPUs */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_work [NR_STD_WORKER_POOLS],
+				     bh_pool_irq_works);
+
 /* the BH worker pools */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
 				     bh_worker_pools);
@@ -1197,6 +1202,13 @@ static bool assign_work(struct work_struct *work, struct worker *worker,
 	return true;
 }
 
+static struct irq_work *bh_pool_irq_work(struct worker_pool *pool)
+{
+	int high = pool->attrs->nice == HIGHPRI_NICE_LEVEL ? 1 : 0;
+
+	return &per_cpu(bh_pool_irq_works, pool->cpu)[high];
+}
+
 /**
  * kick_pool - wake up an idle worker if necessary
  * @pool: pool to kick
@@ -1215,10 +1227,15 @@ static bool kick_pool(struct worker_pool *pool)
 		return false;
 
 	if (pool->flags & POOL_BH) {
-		if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
-			raise_softirq_irqoff(HI_SOFTIRQ);
-		else
-			raise_softirq_irqoff(TASKLET_SOFTIRQ);
+		if (likely(pool->cpu == smp_processor_id())) {
+			if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
+				raise_softirq_irqoff(HI_SOFTIRQ);
+			else
+				raise_softirq_irqoff(TASKLET_SOFTIRQ);
+		} else {
+			irq_work_queue_on(bh_pool_irq_work(pool), pool->cpu);
+		}
+
 		return true;
 	}
 
@@ -7367,6 +7384,16 @@ static inline void wq_watchdog_init(void) { }
 
 #endif	/* CONFIG_WQ_WATCHDOG */
 
+static void bh_pool_kick_normal(struct irq_work *irq_work)
+{
+	raise_softirq_irqoff(TASKLET_SOFTIRQ);
+}
+
+static void bh_pool_kick_highpri(struct irq_work *irq_work)
+{
+	raise_softirq_irqoff(HI_SOFTIRQ);
+}
+
 static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask)
 {
 	if (!cpumask_intersects(wq_unbound_cpumask, mask)) {
@@ -7408,6 +7435,8 @@ void __init workqueue_init_early(void)
 {
 	struct wq_pod_type *pt = &wq_pod_types[WQ_AFFN_SYSTEM];
 	int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
+	void (*irq_work_fns[2])(struct irq_work *) = { bh_pool_kick_normal,
+						       bh_pool_kick_highpri };
 	int i, cpu;
 
 	BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
@@ -7455,8 +7484,10 @@ void __init workqueue_init_early(void)
 
 		i = 0;
 		for_each_bh_worker_pool(pool, cpu) {
-			init_cpu_worker_pool(pool, cpu, std_nice[i++]);
+			init_cpu_worker_pool(pool, cpu, std_nice[i]);
 			pool->flags |= POOL_BH;
+			init_irq_work(bh_pool_irq_work(pool), irq_work_fns[i]);
+			i++;
 		}
 
 		i = 0;
-- 
2.43.0


             reply	other threads:[~2024-02-14 18:39 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-14 18:39 Tejun Heo [this message]
2024-02-14 19:03 ` [PATCH wq/for-6.9] workqueue: Fix queue_work_on() with BH workqueues Linus Torvalds
2024-02-14 19:16   ` Tejun Heo
2024-02-16  1:23 ` [PATCH wq/for-6.9] workqueue, irq_work: Build fix for !CONFIG_IRQ_WORK Tejun Heo
2024-02-16  5:10   ` [PATCH v2 " Tejun Heo
2024-02-16 10:24     ` Anders Roxell
2024-02-16 16:35       ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Zc0IzeM6tAvm1NTl@mtj.duckdns.org \
    --to=tj@kernel.org \
    --cc=jiangshanlai@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox