linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: jiangshanlai@gmail.com
Cc: torvalds@linux-foundation.org, peterz@infradead.org,
	linux-kernel@vger.kernel.org, kernel-team@meta.com,
	joshdon@google.com, brho@google.com, briannorris@chromium.org,
	nhuck@google.com, agk@redhat.com, snitzer@kernel.org,
	void@manifault.com, Tejun Heo <tj@kernel.org>
Subject: [PATCH 08/24] workqueue: Make per-cpu pool_workqueues allocated and released like unbound ones
Date: Thu, 18 May 2023 14:16:53 -1000	[thread overview]
Message-ID: <20230519001709.2563-9-tj@kernel.org> (raw)
In-Reply-To: <20230519001709.2563-1-tj@kernel.org>

Currently, all per-cpu pwq's (pool_workqueue's) are allocated directly
through a per-cpu allocation and thus, unlike unbound workqueues, not
reference counted. This difference in lifetime management between the two
types is a bit confusing.

Unbound workqueues are currently accessed through wq->numa_pwq_tbl[] which
isn't suitiable for the planned CPU locality related improvements. The plan
is to unify pwq handling across per-cpu and unbound workqueues so that
they're always accessed through wq->cpu_pwq.

In preparation, this patch makes per-cpu pwq's to be allocated, reference
counted and released the same way as unbound pwq's. wq->cpu_pwq now holds
pointers to pwq's instead of containing them directly.

pwq_unbound_release_workfn() is renamed to pwq_release_workfn() as it's now
also used for per-cpu work items.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 74 +++++++++++++++++++++++++---------------------
 1 file changed, 40 insertions(+), 34 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7addda9b37b9..574d2e12417d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -257,11 +257,11 @@ struct pool_workqueue {
 
 	/*
 	 * Release of unbound pwq is punted to a kthread_worker. See put_pwq()
-	 * and pwq_unbound_release_workfn() for details. pool_workqueue itself
-	 * is also RCU protected so that the first pwq can be determined without
+	 * and pwq_release_workfn() for details. pool_workqueue itself is also
+	 * RCU protected so that the first pwq can be determined without
 	 * grabbing wq->mutex.
 	 */
-	struct kthread_work	unbound_release_work;
+	struct kthread_work	release_work;
 	struct rcu_head		rcu;
 } __aligned(1 << WORK_STRUCT_FLAG_BITS);
 
@@ -320,7 +320,7 @@ struct workqueue_struct {
 
 	/* hot fields used during command issue, aligned to cacheline */
 	unsigned int		flags ____cacheline_aligned; /* WQ: WQ_* flags */
-	struct pool_workqueue __percpu *cpu_pwq; /* I: per-cpu pwqs */
+	struct pool_workqueue __percpu **cpu_pwq; /* I: per-cpu pwqs */
 	struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
 };
 
@@ -1351,13 +1351,11 @@ static void put_pwq(struct pool_workqueue *pwq)
 	lockdep_assert_held(&pwq->pool->lock);
 	if (likely(--pwq->refcnt))
 		return;
-	if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
-		return;
 	/*
 	 * @pwq can't be released under pool->lock, bounce to a dedicated
 	 * kthread_worker to avoid A-A deadlocks.
 	 */
-	kthread_queue_work(pwq_release_worker, &pwq->unbound_release_work);
+	kthread_queue_work(pwq_release_worker, &pwq->release_work);
 }
 
 /**
@@ -1666,7 +1664,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	} else {
 		if (req_cpu == WORK_CPU_UNBOUND)
 			cpu = raw_smp_processor_id();
-		pwq = per_cpu_ptr(wq->cpu_pwq, cpu);
+		pwq = *per_cpu_ptr(wq->cpu_pwq, cpu);
 	}
 
 	pool = pwq->pool;
@@ -3987,31 +3985,30 @@ static void rcu_free_pwq(struct rcu_head *rcu)
  * Scheduled on pwq_release_worker by put_pwq() when an unbound pwq hits zero
  * refcnt and needs to be destroyed.
  */
-static void pwq_unbound_release_workfn(struct kthread_work *work)
+static void pwq_release_workfn(struct kthread_work *work)
 {
 	struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
-						  unbound_release_work);
+						  release_work);
 	struct workqueue_struct *wq = pwq->wq;
 	struct worker_pool *pool = pwq->pool;
 	bool is_last = false;
 
 	/*
-	 * when @pwq is not linked, it doesn't hold any reference to the
+	 * When @pwq is not linked, it doesn't hold any reference to the
 	 * @wq, and @wq is invalid to access.
 	 */
 	if (!list_empty(&pwq->pwqs_node)) {
-		if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
-			return;
-
 		mutex_lock(&wq->mutex);
 		list_del_rcu(&pwq->pwqs_node);
 		is_last = list_empty(&wq->pwqs);
 		mutex_unlock(&wq->mutex);
 	}
 
-	mutex_lock(&wq_pool_mutex);
-	put_unbound_pool(pool);
-	mutex_unlock(&wq_pool_mutex);
+	if (wq->flags & WQ_UNBOUND) {
+		mutex_lock(&wq_pool_mutex);
+		put_unbound_pool(pool);
+		mutex_unlock(&wq_pool_mutex);
+	}
 
 	call_rcu(&pwq->rcu, rcu_free_pwq);
 
@@ -4095,8 +4092,7 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
 	INIT_LIST_HEAD(&pwq->inactive_works);
 	INIT_LIST_HEAD(&pwq->pwqs_node);
 	INIT_LIST_HEAD(&pwq->mayday_node);
-	kthread_init_work(&pwq->unbound_release_work,
-			  pwq_unbound_release_workfn);
+	kthread_init_work(&pwq->release_work, pwq_release_workfn);
 }
 
 /* sync @pwq with the current state of its associated wq and link it */
@@ -4497,20 +4493,25 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 	int cpu, ret;
 
 	if (!(wq->flags & WQ_UNBOUND)) {
-		wq->cpu_pwq = alloc_percpu(struct pool_workqueue);
+		wq->cpu_pwq = alloc_percpu(struct pool_workqueue *);
 		if (!wq->cpu_pwq)
-			return -ENOMEM;
+			goto enomem;
 
 		for_each_possible_cpu(cpu) {
-			struct pool_workqueue *pwq =
+			struct pool_workqueue **pwq_p =
 				per_cpu_ptr(wq->cpu_pwq, cpu);
-			struct worker_pool *cpu_pools =
-				per_cpu(cpu_worker_pools, cpu);
+			struct worker_pool *pool =
+				&(per_cpu_ptr(cpu_worker_pools, cpu)[highpri]);
 
-			init_pwq(pwq, wq, &cpu_pools[highpri]);
+			*pwq_p = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL,
+						       pool->node);
+			if (!*pwq_p)
+				goto enomem;
+
+			init_pwq(*pwq_p, wq, pool);
 
 			mutex_lock(&wq->mutex);
-			link_pwq(pwq);
+			link_pwq(*pwq_p);
 			mutex_unlock(&wq->mutex);
 		}
 		return 0;
@@ -4529,6 +4530,15 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 	cpus_read_unlock();
 
 	return ret;
+
+enomem:
+	if (wq->cpu_pwq) {
+		for_each_possible_cpu(cpu)
+			kfree(*per_cpu_ptr(wq->cpu_pwq, cpu));
+		free_percpu(wq->cpu_pwq);
+		wq->cpu_pwq = NULL;
+	}
+	return -ENOMEM;
 }
 
 static int wq_clamp_max_active(int max_active, unsigned int flags,
@@ -4702,7 +4712,7 @@ static bool pwq_busy(struct pool_workqueue *pwq)
 void destroy_workqueue(struct workqueue_struct *wq)
 {
 	struct pool_workqueue *pwq;
-	int node;
+	int cpu, node;
 
 	/*
 	 * Remove it from sysfs first so that sanity check failure doesn't
@@ -4762,12 +4772,8 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	mutex_unlock(&wq_pool_mutex);
 
 	if (!(wq->flags & WQ_UNBOUND)) {
-		wq_unregister_lockdep(wq);
-		/*
-		 * The base ref is never dropped on per-cpu pwqs.  Directly
-		 * schedule RCU free.
-		 */
-		call_rcu(&wq->rcu, rcu_free_wq);
+		for_each_possible_cpu(cpu)
+			put_pwq_unlocked(*per_cpu_ptr(wq->cpu_pwq, cpu));
 	} else {
 		/*
 		 * We're the sole accessor of @wq at this point.  Directly
@@ -4884,7 +4890,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 		cpu = smp_processor_id();
 
 	if (!(wq->flags & WQ_UNBOUND))
-		pwq = per_cpu_ptr(wq->cpu_pwq, cpu);
+		pwq = *per_cpu_ptr(wq->cpu_pwq, cpu);
 	else
 		pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
 
-- 
2.40.1


  parent reply	other threads:[~2023-05-19  0:18 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-19  0:16 [PATCHSET v1 wq/for-6.5] workqueue: Improve unbound workqueue execution locality Tejun Heo
2023-05-19  0:16 ` [PATCH 01/24] workqueue: Drop the special locking rule for worker->flags and worker_pool->flags Tejun Heo
2023-05-19  0:16 ` [PATCH 02/24] workqueue: Cleanups around process_scheduled_works() Tejun Heo
2023-05-19  0:16 ` [PATCH 03/24] workqueue: Not all work insertion needs to wake up a worker Tejun Heo
2023-05-23  9:54   ` Lai Jiangshan
2023-05-23 21:37     ` Tejun Heo
2023-08-08  1:15   ` [PATCH v2 " Tejun Heo
2023-05-19  0:16 ` [PATCH 04/24] workqueue: Rename wq->cpu_pwqs to wq->cpu_pwq Tejun Heo
2023-05-19  0:16 ` [PATCH 05/24] workqueue: Relocate worker and work management functions Tejun Heo
2023-05-19  0:16 ` [PATCH 06/24] workqueue: Remove module param disable_numa and sysfs knobs pool_ids and numa Tejun Heo
2023-05-19  0:16 ` [PATCH 07/24] workqueue: Use a kthread_worker to release pool_workqueues Tejun Heo
2023-05-19  0:16 ` Tejun Heo [this message]
2023-05-19  0:16 ` [PATCH 09/24] workqueue: Make unbound workqueues to use per-cpu pool_workqueues Tejun Heo
2023-05-22  6:41   ` Leon Romanovsky
2023-05-22 12:27     ` Dennis Dalessandro
2023-05-19  0:16 ` [PATCH 10/24] workqueue: Rename workqueue_attrs->no_numa to ->ordered Tejun Heo
2023-05-19  0:16 ` [PATCH 11/24] workqueue: Rename NUMA related names to use pod instead Tejun Heo
2023-05-19  0:16 ` [PATCH 12/24] workqueue: Move wq_pod_init() below workqueue_init() Tejun Heo
2023-05-19  0:16 ` [PATCH 13/24] workqueue: Initialize unbound CPU pods later in the boot Tejun Heo
2023-05-19  0:16 ` [PATCH 14/24] workqueue: Generalize unbound CPU pods Tejun Heo
2023-05-30  8:06   ` K Prateek Nayak
2023-06-07  1:50     ` Tejun Heo
2023-05-30 21:18   ` Sandeep Dhavale
2023-05-31 12:14     ` K Prateek Nayak
2023-06-07 22:13       ` Tejun Heo
2023-06-08  3:01         ` K Prateek Nayak
2023-06-08 22:50           ` Tejun Heo
2023-06-09  3:43             ` K Prateek Nayak
2023-06-14 18:49               ` Sandeep Dhavale
2023-06-21 20:14                 ` Tejun Heo
2023-06-19  4:30             ` Swapnil Sapkal
2023-06-21 20:38               ` Tejun Heo
2023-07-05  7:04             ` K Prateek Nayak
2023-07-05 18:39               ` Tejun Heo
2023-07-11  3:02                 ` K Prateek Nayak
2023-07-31 23:52                   ` Tejun Heo
2023-08-08  1:08                     ` Tejun Heo
2023-05-19  0:17 ` [PATCH 15/24] workqueue: Add tools/workqueue/wq_dump.py which prints out workqueue configuration Tejun Heo
2023-05-19  0:17 ` [PATCH 16/24] workqueue: Modularize wq_pod_type initialization Tejun Heo
2023-05-19  0:17 ` [PATCH 17/24] workqueue: Add multiple affinity scopes and interface to select them Tejun Heo
2023-05-19  0:17 ` [PATCH 18/24] workqueue: Factor out work to worker assignment and collision handling Tejun Heo
2023-05-19  0:17 ` [PATCH 19/24] workqueue: Factor out need_more_worker() check and worker wake-up Tejun Heo
2023-05-19  0:17 ` [PATCH 20/24] workqueue: Add workqueue_attrs->__pod_cpumask Tejun Heo
2023-05-19  0:17 ` [PATCH 21/24] workqueue: Implement non-strict affinity scope for unbound workqueues Tejun Heo
2023-05-19  0:17 ` [PATCH 22/24] workqueue: Add "Affinity Scopes and Performance" section to documentation Tejun Heo
2023-05-19  0:17 ` [PATCH 23/24] workqueue: Add pool_workqueue->cpu Tejun Heo
2023-05-19  0:17 ` [PATCH 24/24] workqueue: Implement localize-to-issuing-CPU for unbound workqueues Tejun Heo
2023-05-19  0:41 ` [PATCHSET v1 wq/for-6.5] workqueue: Improve unbound workqueue execution locality Linus Torvalds
2023-05-19 22:35   ` Tejun Heo
2023-05-19 23:03     ` Tejun Heo
2023-05-23  1:51       ` Linus Torvalds
2023-05-23 17:59         ` Linus Torvalds
2023-05-23 20:08           ` Rik van Riel
2023-05-23 21:36           ` Sandeep Dhavale
2023-05-23 11:18 ` Peter Zijlstra
2023-05-23 16:12   ` Vincent Guittot
2023-05-24  7:34     ` Peter Zijlstra
2023-05-24 13:15       ` Vincent Guittot
2023-06-05  4:46     ` Gautham R. Shenoy
2023-06-07 14:42       ` Libo Chen
2023-05-26  1:12   ` Tejun Heo
2023-05-30 11:32     ` Peter Zijlstra
2023-06-12 23:56 ` Brian Norris
2023-06-13  2:48   ` Tejun Heo
2023-06-13  9:26     ` Pin-yen Lin
2023-06-21 19:16       ` Tejun Heo
2023-06-21 19:31         ` Linus Torvalds
2023-06-29  9:49           ` Pin-yen Lin
2023-07-03 21:47             ` Tejun Heo
2023-08-08  1:22 ` Tejun Heo
2023-08-08  2:58   ` K Prateek Nayak
2023-08-08  7:59     ` Tejun Heo
2023-08-18  4:05     ` K Prateek Nayak

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230519001709.2563-9-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=agk@redhat.com \
    --cc=brho@google.com \
    --cc=briannorris@chromium.org \
    --cc=jiangshanlai@gmail.com \
    --cc=joshdon@google.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nhuck@google.com \
    --cc=peterz@infradead.org \
    --cc=snitzer@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=void@manifault.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).