public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, laijs@cn.fujitsu.com
Cc: axboe@kernel.dk, jmoyer@redhat.com, zab@redhat.com,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH 23/31] workqueue: implement get/put_pwq()
Date: Fri,  1 Mar 2013 19:24:14 -0800	[thread overview]
Message-ID: <1362194662-2344-24-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1362194662-2344-1-git-send-email-tj@kernel.org>

Add pool_workqueue->refcnt along with get/put_pwq().  Both per-cpu and
unbound pwqs have refcnts and any work item inserted on a pwq
increments the refcnt which is dropped when the work item finishes.

For per-cpu pwqs the base ref is never dropped and destroy_workqueue()
frees the pwqs as before.  For unbound ones, destroy_workqueue()
simply drops the base ref on the first pwq.  When the refcnt reaches
zero, pwq_unbound_release_workfn() is scheduled on system_wq, which
unlinks the pwq, puts the associated pool and frees the pwq and wq as
necessary.  This needs to be done from a work item as put_pwq() needs
to be protected by pool->lock but release can't happen with the lock
held - e.g. put_unbound_pool() involves blocking operations.

Unbound pool->locks are marked with lockdep subclas 1 as put_pwq()
will schedule the release work item on system_wq while holding the
unbound pool's lock and triggers recursive locking warning spuriously.

This will be used to implement dynamic creation and destruction of
unbound pwqs.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 137 ++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 114 insertions(+), 23 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d0604ee..e092cd5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -179,6 +179,7 @@ struct pool_workqueue {
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
 	int			work_color;	/* L: current color */
 	int			flush_color;	/* L: flushing color */
+	int			refcnt;		/* L: reference count */
 	int			nr_in_flight[WORK_NR_COLORS];
 						/* L: nr of in_flight works */
 	int			nr_active;	/* L: nr of active works */
@@ -186,6 +187,15 @@ struct pool_workqueue {
 	struct list_head	delayed_works;	/* L: delayed works */
 	struct list_head	pwqs_node;	/* R: node on wq->pwqs */
 	struct list_head	mayday_node;	/* W: node on wq->maydays */
+
+	/*
+	 * Release of unbound pwq is punted to system_wq.  See put_pwq()
+	 * and pwq_unbound_release_workfn() for details.  pool_workqueue
+	 * itself is also sched-RCU protected so that the first pwq can be
+	 * determined without grabbing workqueue_lock.
+	 */
+	struct work_struct	unbound_release_work;
+	struct rcu_head		rcu;
 } __aligned(1 << WORK_STRUCT_FLAG_BITS);
 
 /*
@@ -936,6 +946,45 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
 		*nextp = n;
 }
 
+/**
+ * get_pwq - get an extra reference on the specified pool_workqueue
+ * @pwq: pool_workqueue to get
+ *
+ * Obtain an extra reference on @pwq.  The caller should guarantee that
+ * @pwq has positive refcnt and be holding the matching pool->lock.
+ */
+static void get_pwq(struct pool_workqueue *pwq)
+{
+	lockdep_assert_held(&pwq->pool->lock);
+	WARN_ON_ONCE(pwq->refcnt <= 0);
+	pwq->refcnt++;
+}
+
+/**
+ * put_pwq - put a pool_workqueue reference
+ * @pwq: pool_workqueue to put
+ *
+ * Drop a reference of @pwq.  If its refcnt reaches zero, schedule its
+ * destruction.  The caller should be holding the matching pool->lock.
+ */
+static void put_pwq(struct pool_workqueue *pwq)
+{
+	lockdep_assert_held(&pwq->pool->lock);
+	if (likely(--pwq->refcnt))
+		return;
+	if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
+		return;
+	/*
+	 * @pwq can't be released under pool->lock, bounce to
+	 * pwq_unbound_release_workfn().  This never recurses on the same
+	 * pool->lock as this path is taken only for unbound workqueues and
+	 * the release work item is scheduled on a per-cpu workqueue.  To
+	 * avoid lockdep warning, unbound pool->locks are given lockdep
+	 * subclass of 1 in get_unbound_pool().
+	 */
+	schedule_work(&pwq->unbound_release_work);
+}
+
 static void pwq_activate_delayed_work(struct work_struct *work)
 {
 	struct pool_workqueue *pwq = get_work_pwq(work);
@@ -967,9 +1016,9 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
  */
 static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 {
-	/* ignore uncolored works */
+	/* uncolored work items don't participate in flushing or nr_active */
 	if (color == WORK_NO_COLOR)
-		return;
+		goto out_put;
 
 	pwq->nr_in_flight[color]--;
 
@@ -982,11 +1031,11 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 
 	/* is flush in progress and are we at the flushing tip? */
 	if (likely(pwq->flush_color != color))
-		return;
+		goto out_put;
 
 	/* are there still in-flight works? */
 	if (pwq->nr_in_flight[color])
-		return;
+		goto out_put;
 
 	/* this pwq is done, clear flush_color */
 	pwq->flush_color = -1;
@@ -997,6 +1046,8 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 	 */
 	if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
 		complete(&pwq->wq->first_flusher->done);
+out_put:
+	put_pwq(pwq);
 }
 
 /**
@@ -1119,6 +1170,7 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
 	/* we own @work, set data and link */
 	set_work_pwq(work, pwq, extra_flags);
 	list_add_tail(&work->entry, head);
+	get_pwq(pwq);
 
 	/*
 	 * Ensure either worker_sched_deactivated() sees the above
@@ -3294,6 +3346,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 	if (!pool || init_worker_pool(pool) < 0)
 		goto fail;
 
+	lockdep_set_subclass(&pool->lock, 1);	/* see put_pwq() */
 	copy_workqueue_attrs(pool->attrs, attrs);
 
 	if (worker_pool_assign_id(pool) < 0)
@@ -3322,7 +3375,41 @@ fail:
 	return NULL;
 }
 
-/* initialize @pwq which interfaces with @pool for @wq and link it in */
+static void rcu_free_pwq(struct rcu_head *rcu)
+{
+	kmem_cache_free(pwq_cache,
+			container_of(rcu, struct pool_workqueue, rcu));
+}
+
+/*
+ * Scheduled on system_wq by put_pwq() when an unbound pwq hits zero refcnt
+ * and needs to be destroyed.
+ */
+static void pwq_unbound_release_workfn(struct work_struct *work)
+{
+	struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
+						  unbound_release_work);
+	struct workqueue_struct *wq = pwq->wq;
+	struct worker_pool *pool = pwq->pool;
+
+	if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+		return;
+
+	spin_lock_irq(&workqueue_lock);
+	list_del_rcu(&pwq->pwqs_node);
+	spin_unlock_irq(&workqueue_lock);
+
+	put_unbound_pool(pool);
+	call_rcu_sched(&pwq->rcu, rcu_free_pwq);
+
+	/*
+	 * If we're the last pwq going away, @wq is already dead and no one
+	 * is gonna access it anymore.  Free it.
+	 */
+	if (list_empty(&wq->pwqs))
+		kfree(wq);
+}
+
 static void init_and_link_pwq(struct pool_workqueue *pwq,
 			      struct workqueue_struct *wq,
 			      struct worker_pool *pool)
@@ -3332,9 +3419,11 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	pwq->pool = pool;
 	pwq->wq = wq;
 	pwq->flush_color = -1;
+	pwq->refcnt = 1;
 	pwq->max_active = wq->saved_max_active;
 	INIT_LIST_HEAD(&pwq->delayed_works);
 	INIT_LIST_HEAD(&pwq->mayday_node);
+	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
 
 	list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
 }
@@ -3377,15 +3466,6 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 	return 0;
 }
 
-static void free_pwqs(struct workqueue_struct *wq)
-{
-	if (!(wq->flags & WQ_UNBOUND))
-		free_percpu(wq->cpu_pwqs);
-	else if (!list_empty(&wq->pwqs))
-		kmem_cache_free(pwq_cache, list_first_entry(&wq->pwqs,
-					struct pool_workqueue, pwqs_node));
-}
-
 static int wq_clamp_max_active(int max_active, unsigned int flags,
 			       const char *name)
 {
@@ -3517,7 +3597,8 @@ void destroy_workqueue(struct workqueue_struct *wq)
 			}
 		}
 
-		if (WARN_ON(pwq->nr_active) ||
+		if (WARN_ON(pwq->refcnt > 1) ||
+		    WARN_ON(pwq->nr_active) ||
 		    WARN_ON(!list_empty(&pwq->delayed_works))) {
 			spin_unlock_irq(&workqueue_lock);
 			return;
@@ -3538,17 +3619,27 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		wq->rescuer = NULL;
 	}
 
-	/*
-	 * We're the sole accessor of @wq at this point.  Directly access
-	 * the first pwq and put its pool.
-	 */
-	if (wq->flags & WQ_UNBOUND) {
+	if (!(wq->flags & WQ_UNBOUND)) {
+		/*
+		 * The base ref is never dropped on per-cpu pwqs.  Directly
+		 * free the pwqs and wq.
+		 */
+		free_percpu(wq->cpu_pwqs);
+		kfree(wq);
+	} else {
+		/*
+		 * We're the sole accessor of @wq at this point.  Directly
+		 * access the first pwq and put the base ref.  As both pwqs
+		 * and pools are sched-RCU protected, the lock operations
+		 * are safe.  @wq will be freed when the last pwq is
+		 * released.
+		 */
 		pwq = list_first_entry(&wq->pwqs, struct pool_workqueue,
 				       pwqs_node);
-		put_unbound_pool(pwq->pool);
+		spin_lock_irq(&pwq->pool->lock);
+		put_pwq(pwq);
+		spin_unlock_irq(&pwq->pool->lock);
 	}
-	free_pwqs(wq);
-	kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
-- 
1.8.1.2


  parent reply	other threads:[~2013-03-02  3:28 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-02  3:23 [PATCHSET wq/for-3.10-tmp] workqueue: implement workqueue with custom worker attributes Tejun Heo
2013-03-02  3:23 ` [PATCH 01/31] workqueue: make sanity checks less punshing using WARN_ON[_ONCE]()s Tejun Heo
2013-03-02  3:23 ` [PATCH 02/31] workqueue: make workqueue_lock irq-safe Tejun Heo
2013-03-02  3:23 ` [PATCH 03/31] workqueue: introduce kmem_cache for pool_workqueues Tejun Heo
2013-03-02  3:23 ` [PATCH 04/31] workqueue: add workqueue_struct->pwqs list Tejun Heo
2013-03-02  3:23 ` [PATCH 05/31] workqueue: replace for_each_pwq_cpu() with for_each_pwq() Tejun Heo
2013-03-02  3:23 ` [PATCH 06/31] workqueue: introduce for_each_pool() Tejun Heo
2013-03-02  3:23 ` [PATCH 07/31] workqueue: restructure pool / pool_workqueue iterations in freeze/thaw functions Tejun Heo
2013-03-10 10:09   ` Lai Jiangshan
2013-03-10 12:34     ` Tejun Heo
2013-03-02  3:23 ` [PATCH 08/31] workqueue: add wokrqueue_struct->maydays list to replace mayday cpu iterators Tejun Heo
2013-03-02  3:24 ` [PATCH 09/31] workqueue: consistently use int for @cpu variables Tejun Heo
2013-03-02  3:24 ` [PATCH 10/31] workqueue: remove workqueue_struct->pool_wq.single Tejun Heo
2013-03-02  3:24 ` [PATCH 11/31] workqueue: replace get_pwq() with explicit per_cpu_ptr() accesses and first_pwq() Tejun Heo
2013-03-02  3:24 ` [PATCH 12/31] workqueue: update synchronization rules on workqueue->pwqs Tejun Heo
2013-03-10 10:09   ` Lai Jiangshan
2013-03-10 12:38     ` Tejun Heo
2013-03-12 18:20   ` [PATCH v2 " Tejun Heo
2013-03-02  3:24 ` [PATCH 13/31] workqueue: update synchronization rules on worker_pool_idr Tejun Heo
2013-03-12 18:20   ` [PATCH v2 " Tejun Heo
2013-03-02  3:24 ` [PATCH 14/31] workqueue: replace POOL_MANAGING_WORKERS flag with worker_pool->manager_mutex Tejun Heo
2013-03-10 10:09   ` Lai Jiangshan
2013-03-10 12:46     ` Tejun Heo
2013-03-12 18:19   ` [PATCH v2 " Tejun Heo
2013-03-02  3:24 ` [PATCH 15/31] workqueue: separate out init_worker_pool() from init_workqueues() Tejun Heo
2013-03-02  3:24 ` [PATCH 16/31] workqueue: introduce workqueue_attrs Tejun Heo
2013-03-04 18:37   ` [PATCH v2 " Tejun Heo
2013-03-05 22:29     ` Ryan Mallon
2013-03-05 22:33       ` Tejun Heo
2013-03-05 22:34         ` Tejun Heo
2013-03-05 22:40           ` Ryan Mallon
2013-03-05 22:44             ` Tejun Heo
2013-03-05 23:20               ` Ryan Mallon
2013-03-05 23:28                 ` Tejun Heo
2013-03-02  3:24 ` [PATCH 17/31] workqueue: implement attribute-based unbound worker_pool management Tejun Heo
2013-03-10 10:08   ` Lai Jiangshan
2013-03-10 12:58     ` Tejun Heo
2013-03-10 18:36       ` Tejun Heo
2013-03-12 18:21   ` [PATCH v2 " Tejun Heo
2013-03-02  3:24 ` [PATCH 18/31] workqueue: remove unbound_std_worker_pools[] and related helpers Tejun Heo
2013-03-02  3:24 ` [PATCH 19/31] workqueue: drop "std" from cpu_std_worker_pools and for_each_std_worker_pool() Tejun Heo
2013-03-02  3:24 ` [PATCH 20/31] workqueue: add pool ID to the names of unbound kworkers Tejun Heo
2013-03-02  3:24 ` [PATCH 21/31] workqueue: drop WQ_RESCUER and test workqueue->rescuer for NULL instead Tejun Heo
2013-03-02  3:24 ` [PATCH 22/31] workqueue: restructure __alloc_workqueue_key() Tejun Heo
2013-03-02  3:24 ` Tejun Heo [this message]
2013-03-02  3:24 ` [PATCH 24/31] workqueue: prepare flush_workqueue() for dynamic creation and destrucion of unbound pool_workqueues Tejun Heo
2013-03-02  3:24 ` [PATCH 25/31] workqueue: perform non-reentrancy test when queueing to unbound workqueues too Tejun Heo
2013-03-02  3:24 ` [PATCH 26/31] workqueue: implement apply_workqueue_attrs() Tejun Heo
2013-03-02  3:24 ` [PATCH 27/31] workqueue: make it clear that WQ_DRAINING is an internal flag Tejun Heo
2013-03-02  3:24 ` [PATCH 28/31] workqueue: reject increasing max_active for ordered workqueues Tejun Heo
2013-03-04 18:30   ` [PATCH UPDATED 28/31] workqueue: reject adjusting max_active or applying attrs to " Tejun Heo
2013-03-02  3:24 ` [PATCH 29/31] cpumask: implement cpumask_parse() Tejun Heo
2013-03-02  3:24 ` [PATCH 30/31] driver/base: implement subsys_virtual_register() Tejun Heo
2013-03-02 18:17   ` Greg Kroah-Hartman
2013-03-02 20:26     ` Tejun Heo
2013-03-03  6:42     ` Kay Sievers
2013-03-05 20:43       ` Tejun Heo
2013-03-07 23:31         ` Greg Kroah-Hartman
2013-03-08  0:04           ` Kay Sievers
2013-03-10 11:57             ` Tejun Heo
2013-03-10 16:45               ` Greg Kroah-Hartman
2013-03-10 17:00                 ` Kay Sievers
2013-03-10 17:24                   ` Greg Kroah-Hartman
2013-03-10 17:50                     ` Kay Sievers
2013-03-10 18:34                       ` Tejun Heo
2013-03-12 18:40                         ` Tejun Heo
2013-03-02  3:24 ` [PATCH 31/31] workqueue: implement sysfs interface for workqueues Tejun Heo
2013-03-04 18:30   ` [PATCH v2 " Tejun Heo
2013-03-05 20:41 ` [PATCHSET wq/for-3.10-tmp] workqueue: implement workqueue with custom worker attributes Tejun Heo
2013-03-10 10:34 ` Lai Jiangshan
2013-03-10 12:01   ` Tejun Heo
2013-03-11 15:24     ` Tejun Heo
2013-03-11 15:40       ` Lai Jiangshan
2013-03-11 15:42     ` Lai Jiangshan
2013-03-11 15:43       ` Tejun Heo
2013-03-12 18:10     ` Tejun Heo
2013-03-12 18:34 ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1362194662-2344-24-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=jmoyer@redhat.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=zab@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox