[PATCH 13/24] workqueue: Initialize unbound CPU pods later in the boot

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Tejun Heo <tj@kernel.org>
To: jiangshanlai@gmail.com
Cc: torvalds@linux-foundation.org, peterz@infradead.org,
	linux-kernel@vger.kernel.org, kernel-team@meta.com,
	joshdon@google.com, brho@google.com, briannorris@chromium.org,
	nhuck@google.com, agk@redhat.com, snitzer@kernel.org,
	void@manifault.com, Tejun Heo <tj@kernel.org>
Subject: [PATCH 13/24] workqueue: Initialize unbound CPU pods later in the boot
Date: Thu, 18 May 2023 14:16:58 -1000	[thread overview]
Message-ID: <20230519001709.2563-14-tj@kernel.org> (raw)
In-Reply-To: <20230519001709.2563-1-tj@kernel.org>

During boot, to initialize unbound CPU pods, wq_pod_init() was called from
workqueue_init(). This is early enough for NUMA nodes to be set up but
before SMP is brought up and CPU topology information is populated.

Workqueue is in the process of improving CPU locality for unbound workqueues
and will need access to topology information during pod init. This adds a
new init function workqueue_init_topology() which is called after CPU
topology information is available and replaces wq_pod_init().

As unbound CPU pods are now initialized after workqueues are activated, we
need to revisit the workqueues to apply the pod configuration. Workqueues
which are created before workqueue_init_topology() are set up so that they
always use the default worker pool. After pods are set up in
workqueue_init_topology(), wq_update_pod() is called on all existing
workqueues to update the pool associations accordingly.

Note that wq_update_pod_attrs_buf allocation is moved to
workqueue_init_early(). This isn't necessary right now but enables further
generalization of pod handling in the future.

This patch changes the initialization sequence but the end result should be
the same.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  1 +
 init/main.c               |  1 +
 kernel/workqueue.c        | 68 +++++++++++++++++++++++----------------
 3 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 8cc9b86d3256..b8961c8ea5b3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -709,5 +709,6 @@ int workqueue_offline_cpu(unsigned int cpu);
 
 void __init workqueue_init_early(void);
 void __init workqueue_init(void);
+void __init workqueue_init_topology(void);
 
 #endif
diff --git a/init/main.c b/init/main.c
index af50044deed5..6bd5fffce2e6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1565,6 +1565,7 @@ static noinline void __init kernel_init_freeable(void)
 	smp_init();
 	sched_init_smp();
 
+	workqueue_init_topology();
 	padata_init();
 	page_alloc_init_late();
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 914a69f83d59..add6f5fc799b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -6222,17 +6222,15 @@ static inline void wq_watchdog_init(void) { }
 
 #endif	/* CONFIG_WQ_WATCHDOG */
 
-static void wq_pod_init(void);
-
 /**
  * workqueue_init_early - early init for workqueue subsystem
  *
- * This is the first half of two-staged workqueue subsystem initialization
- * and invoked as soon as the bare basics - memory allocation, cpumasks and
- * idr are up.  It sets up all the data structures and system workqueues
- * and allows early boot code to create workqueues and queue/cancel work
- * items.  Actual work item execution starts only after kthreads can be
- * created and scheduled right before early initcalls.
+ * This is the first step of three-staged workqueue subsystem initialization and
+ * invoked as soon as the bare basics - memory allocation, cpumasks and idr are
+ * up. It sets up all the data structures and system workqueues and allows early
+ * boot code to create workqueues and queue/cancel work items. Actual work item
+ * execution starts only after kthreads can be created and scheduled right
+ * before early initcalls.
  */
 void __init workqueue_init_early(void)
 {
@@ -6247,6 +6245,9 @@ void __init workqueue_init_early(void)
 
 	pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
 
+	wq_update_pod_attrs_buf = alloc_workqueue_attrs();
+	BUG_ON(!wq_update_pod_attrs_buf);
+
 	/* initialize CPU pools */
 	for_each_possible_cpu(cpu) {
 		struct worker_pool *pool;
@@ -6305,11 +6306,11 @@ void __init workqueue_init_early(void)
 /**
  * workqueue_init - bring workqueue subsystem fully online
  *
- * This is the latter half of two-staged workqueue subsystem initialization
- * and invoked as soon as kthreads can be created and scheduled.
- * Workqueues have been created and work items queued on them, but there
- * are no kworkers executing the work items yet.  Populate the worker pools
- * with the initial workers and enable future kworker creations.
+ * This is the second step of three-staged workqueue subsystem initialization
+ * and invoked as soon as kthreads can be created and scheduled. Workqueues have
+ * been created and work items queued on them, but there are no kworkers
+ * executing the work items yet. Populate the worker pools with the initial
+ * workers and enable future kworker creations.
  */
 void __init workqueue_init(void)
 {
@@ -6320,18 +6321,12 @@ void __init workqueue_init(void)
 	pwq_release_worker = kthread_create_worker(0, "pool_workqueue_release");
 	BUG_ON(IS_ERR(pwq_release_worker));
 
-	/*
-	 * It'd be simpler to initialize pods in workqueue_init_early() but CPU
-	 * to node mapping may not be available that early on some archs such as
-	 * power and arm64. As per-cpu pools created previously could be missing
-	 * node hint and unbound pool pod affinity, fix them up.
-	 *
-	 * Also, while iterating workqueues, create rescuers if requested.
-	 */
-	wq_pod_init();
-
 	mutex_lock(&wq_pool_mutex);
 
+	/*
+	 * Per-cpu pools created earlier could be missing node hint. Fix them
+	 * up. Also, create a rescuer for workqueues that requested it.
+	 */
 	for_each_possible_cpu(cpu) {
 		for_each_cpu_worker_pool(pool, cpu) {
 			pool->node = cpu_to_node(cpu);
@@ -6339,7 +6334,6 @@ void __init workqueue_init(void)
 	}
 
 	list_for_each_entry(wq, &workqueues, list) {
-		wq_update_pod(wq, smp_processor_id(), true);
 		WARN(init_rescuer(wq),
 		     "workqueue: failed to create early rescuer for %s",
 		     wq->name);
@@ -6362,8 +6356,16 @@ void __init workqueue_init(void)
 	wq_watchdog_init();
 }
 
-static void __init wq_pod_init(void)
+/**
+ * workqueue_init_topology - initialize CPU pods for unbound workqueues
+ *
+ * This is the third step of there-staged workqueue subsystem initialization and
+ * invoked after SMP and topology information are fully initialized. It
+ * initializes the unbound CPU pods accordingly.
+ */
+void __init workqueue_init_topology(void)
 {
+	struct workqueue_struct *wq;
 	cpumask_var_t *tbl;
 	int node, cpu;
 
@@ -6377,8 +6379,7 @@ static void __init wq_pod_init(void)
 		}
 	}
 
-	wq_update_pod_attrs_buf = alloc_workqueue_attrs();
-	BUG_ON(!wq_update_pod_attrs_buf);
+	mutex_lock(&wq_pool_mutex);
 
 	/*
 	 * We want masks of possible CPUs of each node which isn't readily
@@ -6399,6 +6400,19 @@ static void __init wq_pod_init(void)
 
 	wq_pod_cpus = tbl;
 	wq_pod_enabled = true;
+
+	/*
+	 * Workqueues allocated earlier would have all CPUs sharing the default
+	 * worker pool. Explicitly call wq_update_pod() on all workqueue and CPU
+	 * combinations to apply per-pod sharing.
+	 */
+	list_for_each_entry(wq, &workqueues, list) {
+		for_each_online_cpu(cpu) {
+			wq_update_pod(wq, cpu, true);
+		}
+	}
+
+	mutex_unlock(&wq_pool_mutex);
 }
 
 /*
-- 
2.40.1

next prev parent reply	other threads:[~2023-05-19  0:18 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-19  0:16 [PATCHSET v1 wq/for-6.5] workqueue: Improve unbound workqueue execution locality Tejun Heo
2023-05-19  0:16 ` [PATCH 01/24] workqueue: Drop the special locking rule for worker->flags and worker_pool->flags Tejun Heo
2023-05-19  0:16 ` [PATCH 02/24] workqueue: Cleanups around process_scheduled_works() Tejun Heo
2023-05-19  0:16 ` [PATCH 03/24] workqueue: Not all work insertion needs to wake up a worker Tejun Heo
2023-05-23  9:54   ` Lai Jiangshan
2023-05-23 21:37     ` Tejun Heo
2023-08-08  1:15   ` [PATCH v2 " Tejun Heo
2023-05-19  0:16 ` [PATCH 04/24] workqueue: Rename wq->cpu_pwqs to wq->cpu_pwq Tejun Heo
2023-05-19  0:16 ` [PATCH 05/24] workqueue: Relocate worker and work management functions Tejun Heo
2023-05-19  0:16 ` [PATCH 06/24] workqueue: Remove module param disable_numa and sysfs knobs pool_ids and numa Tejun Heo
2023-05-19  0:16 ` [PATCH 07/24] workqueue: Use a kthread_worker to release pool_workqueues Tejun Heo
2023-05-19  0:16 ` [PATCH 08/24] workqueue: Make per-cpu pool_workqueues allocated and released like unbound ones Tejun Heo
2023-05-19  0:16 ` [PATCH 09/24] workqueue: Make unbound workqueues to use per-cpu pool_workqueues Tejun Heo
2023-05-22  6:41   ` Leon Romanovsky
2023-05-22 12:27     ` Dennis Dalessandro
2023-05-19  0:16 ` [PATCH 10/24] workqueue: Rename workqueue_attrs->no_numa to ->ordered Tejun Heo
2023-05-19  0:16 ` [PATCH 11/24] workqueue: Rename NUMA related names to use pod instead Tejun Heo
2023-05-19  0:16 ` [PATCH 12/24] workqueue: Move wq_pod_init() below workqueue_init() Tejun Heo
2023-05-19  0:16 ` Tejun Heo [this message]
2023-05-19  0:16 ` [PATCH 14/24] workqueue: Generalize unbound CPU pods Tejun Heo
2023-05-30  8:06   ` K Prateek Nayak
2023-06-07  1:50     ` Tejun Heo
2023-05-30 21:18   ` Sandeep Dhavale
2023-05-31 12:14     ` K Prateek Nayak
2023-06-07 22:13       ` Tejun Heo
2023-06-08  3:01         ` K Prateek Nayak
2023-06-08 22:50           ` Tejun Heo
2023-06-09  3:43             ` K Prateek Nayak
2023-06-14 18:49               ` Sandeep Dhavale
2023-06-21 20:14                 ` Tejun Heo
2023-06-19  4:30             ` Swapnil Sapkal
2023-06-21 20:38               ` Tejun Heo
2023-07-05  7:04             ` K Prateek Nayak
2023-07-05 18:39               ` Tejun Heo
2023-07-11  3:02                 ` K Prateek Nayak
2023-07-31 23:52                   ` Tejun Heo
2023-08-08  1:08                     ` Tejun Heo
2023-05-19  0:17 ` [PATCH 15/24] workqueue: Add tools/workqueue/wq_dump.py which prints out workqueue configuration Tejun Heo
2023-05-19  0:17 ` [PATCH 16/24] workqueue: Modularize wq_pod_type initialization Tejun Heo
2023-05-19  0:17 ` [PATCH 17/24] workqueue: Add multiple affinity scopes and interface to select them Tejun Heo
2023-05-19  0:17 ` [PATCH 18/24] workqueue: Factor out work to worker assignment and collision handling Tejun Heo
2023-05-19  0:17 ` [PATCH 19/24] workqueue: Factor out need_more_worker() check and worker wake-up Tejun Heo
2023-05-19  0:17 ` [PATCH 20/24] workqueue: Add workqueue_attrs->__pod_cpumask Tejun Heo
2023-05-19  0:17 ` [PATCH 21/24] workqueue: Implement non-strict affinity scope for unbound workqueues Tejun Heo
2023-05-19  0:17 ` [PATCH 22/24] workqueue: Add "Affinity Scopes and Performance" section to documentation Tejun Heo
2023-05-19  0:17 ` [PATCH 23/24] workqueue: Add pool_workqueue->cpu Tejun Heo
2023-05-19  0:17 ` [PATCH 24/24] workqueue: Implement localize-to-issuing-CPU for unbound workqueues Tejun Heo
2023-05-19  0:41 ` [PATCHSET v1 wq/for-6.5] workqueue: Improve unbound workqueue execution locality Linus Torvalds
2023-05-19 22:35   ` Tejun Heo
2023-05-19 23:03     ` Tejun Heo
2023-05-23  1:51       ` Linus Torvalds
2023-05-23 17:59         ` Linus Torvalds
2023-05-23 20:08           ` Rik van Riel
2023-05-23 21:36           ` Sandeep Dhavale
2023-05-23 11:18 ` Peter Zijlstra
2023-05-23 16:12   ` Vincent Guittot
2023-05-24  7:34     ` Peter Zijlstra
2023-05-24 13:15       ` Vincent Guittot
2023-06-05  4:46     ` Gautham R. Shenoy
2023-06-07 14:42       ` Libo Chen
2023-05-26  1:12   ` Tejun Heo
2023-05-30 11:32     ` Peter Zijlstra
2023-06-12 23:56 ` Brian Norris
2023-06-13  2:48   ` Tejun Heo
2023-06-13  9:26     ` Pin-yen Lin
2023-06-21 19:16       ` Tejun Heo
2023-06-21 19:31         ` Linus Torvalds
2023-06-29  9:49           ` Pin-yen Lin
2023-07-03 21:47             ` Tejun Heo
2023-08-08  1:22 ` Tejun Heo
2023-08-08  2:58   ` K Prateek Nayak
2023-08-08  7:59     ` Tejun Heo
2023-08-18  4:05     ` K Prateek Nayak

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8cc9b86d325 dfblob:b8961c8ea5b dfblob:af50044deed
dfblob:6bd5fffce2e dfblob:914a69f83d5 dfblob:add6f5fc799 )
 OR (
bs:"[PATCH 13/24] workqueue: Initialize unbound CPU pods later in the boot" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230519001709.2563-14-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=agk@redhat.com \
    --cc=brho@google.com \
    --cc=briannorris@chromium.org \
    --cc=jiangshanlai@gmail.com \
    --cc=joshdon@google.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nhuck@google.com \
    --cc=peterz@infradead.org \
    --cc=snitzer@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=void@manifault.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.