All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: linux-kernel@vger.kernel.org
Subject: Re: [PATCH wq/for-3.6-fixes 3/3] workqueue: fix possible idle worker depletion during CPU_ONLINE
Date: Fri, 7 Sep 2012 16:05:56 -0700	[thread overview]
Message-ID: <20120907230556.GJ9426@google.com> (raw)
In-Reply-To: <20120907203414.GI9426@google.com>

I got it down to the following but it creates a problem where CPU
hotplug queues a work item on worker->scheduled before the execution
loops starts.  :(

Need to think more about it.

 kernel/workqueue.c |   63 ++++++++++++++++++++++++-----------------------------
 1 file changed, 29 insertions(+), 34 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index dc7b845..4c7502d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -66,6 +66,7 @@ enum {
 
 	/* pool flags */
 	POOL_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
+	POOL_MANAGING_WORKERS	= 1 << 1,
 
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
@@ -165,7 +166,7 @@ struct worker_pool {
 	struct timer_list	idle_timer;	/* L: worker idle timeout */
 	struct timer_list	mayday_timer;	/* L: SOS timer for workers */
 
-	struct mutex		manager_mutex;	/* mutex manager should hold */
+	struct mutex		hotplug_mutex;	/* mutex manager should hold */
 	struct ida		worker_ida;	/* L: for worker IDs */
 };
 
@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
 /* Do we have too many workers and should some go away? */
 static bool too_many_workers(struct worker_pool *pool)
 {
-	bool managing = mutex_is_locked(&pool->manager_mutex);
+	bool managing = pool->flags & POOL_MANAGING_WORKERS;
 	int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
 	int nr_busy = pool->nr_workers - nr_idle;
 
@@ -1390,7 +1391,7 @@ static void rebind_workers(struct global_cwq *gcwq)
 	lockdep_assert_held(&gcwq->lock);
 
 	for_each_worker_pool(pool, gcwq)
-		lockdep_assert_held(&pool->manager_mutex);
+		lockdep_assert_held(&pool->hotplug_mutex);
 
 	/*
 	 * Rebind idle workers.  Interlocked both ways.  We wait for
@@ -1713,22 +1714,16 @@ static void gcwq_mayday_timeout(unsigned long __pool)
  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
  * multiple times.  Does GFP_KERNEL allocations.  Called only from
  * manager.
- *
- * RETURNS:
- * false if no action was taken and gcwq->lock stayed locked, true
- * otherwise.
  */
-static bool maybe_create_worker(struct worker_pool *pool)
-__releases(&gcwq->lock)
-__acquires(&gcwq->lock)
+static void maybe_create_worker(struct worker_pool *pool)
 {
 	struct global_cwq *gcwq = pool->gcwq;
 
+	spin_lock_irq(&gcwq->lock);
 	if (!need_to_create_worker(pool))
-		return false;
+		goto out_unlock;
 restart:
 	spin_unlock_irq(&gcwq->lock);
-
 	/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
 	mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
 
@@ -1741,7 +1736,7 @@ restart:
 			spin_lock_irq(&gcwq->lock);
 			start_worker(worker);
 			BUG_ON(need_to_create_worker(pool));
-			return true;
+			goto out_unlock;
 		}
 
 		if (!need_to_create_worker(pool))
@@ -1758,7 +1753,8 @@ restart:
 	spin_lock_irq(&gcwq->lock);
 	if (need_to_create_worker(pool))
 		goto restart;
-	return true;
+out_unlock:
+	spin_unlock_irq(&gcwq->lock);
 }
 
 /**
@@ -1771,15 +1767,9 @@ restart:
  * LOCKING:
  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
  * multiple times.  Called only from manager.
- *
- * RETURNS:
- * false if no action was taken and gcwq->lock stayed locked, true
- * otherwise.
  */
-static bool maybe_destroy_workers(struct worker_pool *pool)
+static void maybe_destroy_workers(struct worker_pool *pool)
 {
-	bool ret = false;
-
 	while (too_many_workers(pool)) {
 		struct worker *worker;
 		unsigned long expires;
@@ -1793,10 +1783,7 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
 		}
 
 		destroy_worker(worker);
-		ret = true;
 	}
-
-	return ret;
 }
 
 /**
@@ -1820,24 +1807,32 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
  * some action was taken.
  */
 static bool manage_workers(struct worker *worker)
+	__releases(&gcwq->lock) __acquires(&gcwq->lock)
 {
 	struct worker_pool *pool = worker->pool;
-	bool ret = false;
+	struct global_cwq *gcwq = pool->gcwq;
 
-	if (!mutex_trylock(&pool->manager_mutex))
-		return ret;
+	if (pool->flags & POOL_MANAGING_WORKERS)
+		return false;
 
 	pool->flags &= ~POOL_MANAGE_WORKERS;
 
+	spin_unlock_irq(&gcwq->lock);
+
+	/* blah blah */
+	mutex_lock(&pool->hotplug_mutex);
+
 	/*
 	 * Destroy and then create so that may_start_working() is true
 	 * on return.
 	 */
-	ret |= maybe_destroy_workers(pool);
-	ret |= maybe_create_worker(pool);
+	maybe_destroy_workers(pool);
+	maybe_create_worker(pool);
 
-	mutex_unlock(&pool->manager_mutex);
-	return ret;
+	mutex_unlock(&pool->hotplug_mutex);
+
+	spin_lock_irq(&gcwq->lock);
+	return true;
 }
 
 /**
@@ -3399,7 +3394,7 @@ static void gcwq_claim_management_and_lock(struct global_cwq *gcwq)
 	struct worker_pool *pool;
 
 	for_each_worker_pool(pool, gcwq)
-		mutex_lock_nested(&pool->manager_mutex, pool - gcwq->pools);
+		mutex_lock_nested(&pool->hotplug_mutex, pool - gcwq->pools);
 	spin_lock_irq(&gcwq->lock);
 }
 
@@ -3410,7 +3405,7 @@ static void gcwq_release_management_and_unlock(struct global_cwq *gcwq)
 
 	spin_unlock_irq(&gcwq->lock);
 	for_each_worker_pool(pool, gcwq)
-		mutex_unlock(&pool->manager_mutex);
+		mutex_unlock(&pool->hotplug_mutex);
 }
 
 static void gcwq_unbind_fn(struct work_struct *work)
@@ -3749,7 +3744,7 @@ static int __init init_workqueues(void)
 			setup_timer(&pool->mayday_timer, gcwq_mayday_timeout,
 				    (unsigned long)pool);
 
-			mutex_init(&pool->manager_mutex);
+			mutex_init(&pool->hotplug_mutex);
 			ida_init(&pool->worker_ida);
 		}
 

  reply	other threads:[~2012-09-07 23:06 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-06 20:06 [PATCH wq/for-3.6-fixes 1/3] workqueue: break out gcwq->lock locking from gcwq_claim/release_management_and_[un]lock() Tejun Heo
2012-09-06 20:07 ` [PATCH wq/for-3.6-fixes 2/3] workqueue: rename rebind_workers() to gcwq_associate() and let it handle locking and DISASSOCIATED clearing Tejun Heo
2012-09-06 20:08   ` [PATCH wq/for-3.6-fixes 3/3] workqueue: fix possible idle worker depletion during CPU_ONLINE Tejun Heo
2012-09-07  1:53     ` Lai Jiangshan
2012-09-07 19:25       ` Tejun Heo
2012-09-07  3:10     ` Lai Jiangshan
2012-09-07 19:29       ` Tejun Heo
2012-09-07 20:22         ` Tejun Heo
2012-09-07 20:34           ` Tejun Heo
2012-09-07 23:05             ` Tejun Heo [this message]
2012-09-07 23:07               ` Tejun Heo
2012-09-07 23:41                 ` Tejun Heo
2012-09-08 17:18                   ` Lai Jiangshan
2012-09-08 17:29                     ` Tejun Heo
2012-09-08 17:32                       ` Tejun Heo
2012-09-08 17:40                         ` Lai Jiangshan
2012-09-08 17:41                           ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120907230556.GJ9426@google.com \
    --to=tj@kernel.org \
    --cc=laijs@cn.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.