From: Waiman Long <waiman.long@hp.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Jason Low <jason.low2@hp.com>,
mingo@redhat.com, paulmck@linux.vnet.ibm.com,
torvalds@linux-foundation.org, tglx@linutronix.de,
linux-kernel@vger.kernel.org, riel@redhat.com,
akpm@linux-foundation.org, davidlohr@hp.com, hpa@zytor.com,
andi@firstfloor.org, aswin@hp.com, scott.norton@hp.com,
chegu_vinod@hp.com
Subject: Re: [RFC][PATCH v2 5/5] mutex: Give spinners a chance to spin_on_owner if need_resched() triggered while queued
Date: Wed, 05 Feb 2014 16:44:34 -0500 [thread overview]
Message-ID: <52F2B0C2.8040408@hp.com> (raw)
In-Reply-To: <20140129115142.GE9636@twins.programming.kicks-ass.net>
On 01/29/2014 06:51 AM, Peter Zijlstra wrote:
> On Tue, Jan 28, 2014 at 02:51:35PM -0800, Jason Low wrote:
>>> But urgh, nasty problem. Lemme ponder this a bit.
> OK, please have a very careful look at the below. It survived a boot
> with udev -- which usually stresses mutex contention enough to explode
> (in fact it did a few time when I got the contention/cancel path wrong),
> however I have not ran anything else on it.
>
> The below is an MCS variant that allows relatively cheap unqueueing. But
> its somewhat tricky and I might have gotten a case wrong, esp. the
> double concurrent cancel case got my head hurting (I didn't attempt a
> tripple unqueue).
>
> Applies to tip/master but does generate a few (harmless) compile
> warnings because I didn't fully clean up the mcs_spinlock vs m_spinlock
> thing.
>
> Also, there's a comment in the slowpath that bears consideration.
>
>
I have an alternative way of breaking out of the MCS lock waiting queue
when need_resched() is set. I overload the locked flag to indicate a
skipped node if negative. I run the patch through the AIM7 high-systime
workload on a 4-socket server and it seemed to run fine.
Please check the following POC patch to see if you have any comment.
diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index e9a4d74..84a0b32 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -14,17 +14,45 @@
struct mcs_spinlock {
struct mcs_spinlock *next;
- int locked; /* 1 if lock acquired */
+ int locked; /* 1 if lock acquired, -1 if skipping node */
};
+/*
+ * The node skipping feature requires the MCS node to be persistent,
+ * i.e. not allocated on stack. In addition, the MCS node cannot be
+ * reused until after the locked flag is cleared. The mcs_skip_node()
+ * macro must be defined before including this header file.
+ */
+#ifdef mcs_skip_node
+#undef arch_mcs_spin_lock_contended
+#undef arch_mcs_spin_unlock_contended
+
+#define arch_mcs_spin_lock_contended(n) \
+do { \
+ while (!smp_load_acquire(&(n)->locked)) { \
+ if (mcs_skip_node(n)) { \
+ if (cmpxchg(&(n)->locked, 0, -1) == 0) \
+ return; \
+ } \
+ arch_mutex_cpu_relax(); \
+ }; \
+} while (0)
+
+#define arch_mcs_spin_unlock_contended(n) \
+ if (cmpxchg(&(n)->locked, 0, 1) == 0) \
+ return
+
+#define mcs_set_locked(n, v) (n)->locked = (v)
+#else /* mcs_skip_node */
+
#ifndef arch_mcs_spin_lock_contended
/*
* Using smp_load_acquire() provides a memory barrier that ensures
* subsequent operations happen after the lock is acquired.
*/
-#define arch_mcs_spin_lock_contended(l) \
+#define arch_mcs_spin_lock_contended(n) \
do { \
- while (!(smp_load_acquire(l))) \
+ while (!(smp_load_acquire(&(n)->locked))) \
arch_mutex_cpu_relax(); \
} while (0)
#endif
@@ -35,9 +63,12 @@ do { \
* operations in the critical section has been completed before
* unlocking.
*/
-#define arch_mcs_spin_unlock_contended(l) \
- smp_store_release((l), 1)
+#define arch_mcs_spin_unlock_contended(n) \
+ smp_store_release(&(n)->locked, 1)
+
+#define mcs_set_locked(n, v)
#endif
+#endif /* mcs_skip_node */
/*
* Note: the smp_load_acquire/smp_store_release pair is not
@@ -77,12 +108,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock,
struct mcs_spinlock *node)
* value won't be used. If a debug mode is needed to
* audit lock status, then set node->locked value here.
*/
+ mcs_set_locked(node, 1);
return;
}
ACCESS_ONCE(prev->next) = node;
/* Wait until the lock holder passes the lock down. */
- arch_mcs_spin_lock_contended(&node->locked);
+ arch_mcs_spin_lock_contended(node);
}
/*
@@ -94,19 +126,35 @@ void mcs_spin_unlock(struct mcs_spinlock **lock,
struct mcs_spinlock *node)
{
struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+#ifdef mcs_skip_node
+check_next_node:
+#endif
if (likely(!next)) {
/*
* Release the lock by setting it to NULL
*/
- if (likely(cmpxchg(lock, node, NULL) == node))
+ if (likely(cmpxchg(lock, node, NULL) == node)) {
+ mcs_set_locked(node, 0);
return;
+ }
/* Wait until the next pointer is set */
while (!(next = ACCESS_ONCE(node->next)))
arch_mutex_cpu_relax();
}
+ /* Clear the lock flag to indicate the node can be used again. */
+ mcs_set_locked(node, 0);
/* Pass lock to next waiter. */
- arch_mcs_spin_unlock_contended(&next->locked);
+ arch_mcs_spin_unlock_contended(next);
+
+#ifdef mcs_skip_node
+ /*
+ * The next node should be skipped
+ */
+ node = next;
+ next = ACCESS_ONCE(node->next);
+ goto check_next_node;
+#endif
}
#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 45fe1b5..6351eca 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,6 +25,10 @@
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
+/*
+ * Allowed an MCS node to be skipped if need_resched() is true.
+ */
+#define mcs_skip_node(n) need_resched()
#include <linux/mcs_spinlock.h>
/*
@@ -45,6 +49,13 @@
*/
#define MUTEX_SHOW_NO_WAITER(mutex)
(atomic_read(&(mutex)->count) >= 0)
+/*
+ * Using a single mcs node per CPU is safe because mutex_lock() should
not be
+ * called from interrupt context and we have preemption disabled over
the mcs
+ * lock usage.
+ */
+static DEFINE_PER_CPU(struct mcs_spinlock, m_node);
+
void
__mutex_init(struct mutex *lock, const char *name, struct
lock_class_key *key)
{
@@ -166,6 +177,9 @@ static inline int mutex_can_spin_on_owner(struct
mutex *lock)
struct task_struct *owner;
int retval = 1;
+ if (need_resched())
+ return 0;
+
rcu_read_lock();
owner = ACCESS_ONCE(lock->owner);
if (owner)
@@ -370,6 +384,7 @@ __mutex_lock_common(struct mutex *lock, long state,
unsigned int subclass,
struct mutex_waiter waiter;
unsigned long flags;
int ret;
+ struct mcs_spinlock *node;
preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
@@ -400,9 +415,13 @@ __mutex_lock_common(struct mutex *lock, long state,
unsigned int subclass,
if (!mutex_can_spin_on_owner(lock))
goto slowpath;
+ node = this_cpu_ptr(&m_node);
+ if (node->locked < 0)
+ /* MCS node not available yet */
+ goto slowpath;
+
for (;;) {
struct task_struct *owner;
- struct mcs_spinlock node;
if (use_ww_ctx && ww_ctx->acquired > 0) {
struct ww_mutex *ww;
@@ -424,10 +443,15 @@ __mutex_lock_common(struct mutex *lock, long
state, unsigned int subclass,
* If there's an owner, wait for it to either
* release the lock or go to sleep.
*/
- mcs_spin_lock(&lock->mcs_lock, &node);
+ mcs_spin_lock(&lock->mcs_lock, node);
+ if (node->locked < 0)
+ /*
+ * need_resched() true, no unlock needed
+ */
+ goto slowpath;
owner = ACCESS_ONCE(lock->owner);
if (owner && !mutex_spin_on_owner(lock, owner)) {
- mcs_spin_unlock(&lock->mcs_lock, &node);
+ mcs_spin_unlock(&lock->mcs_lock, node);
goto slowpath;
}
@@ -442,11 +466,11 @@ __mutex_lock_common(struct mutex *lock, long
state, unsigned int subclass,
}
mutex_set_owner(lock);
- mcs_spin_unlock(&lock->mcs_lock, &node);
+ mcs_spin_unlock(&lock->mcs_lock, node);
preempt_enable();
return 0;
}
- mcs_spin_unlock(&lock->mcs_lock, &node);
+ mcs_spin_unlock(&lock->mcs_lock, node);
/*
* When there's no owner, we might have preempted between the
next prev parent reply other threads:[~2014-02-05 21:45 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-01-28 19:13 [PATCH v2 0/5] mutex: Mutex scalability patches Jason Low
2014-01-28 19:13 ` [PATCH v2 1/5] mutex: In mutex_can_spin_on_owner(), return false if task need_resched() Jason Low
2014-01-28 20:20 ` Paul E. McKenney
2014-01-28 22:01 ` Jason Low
2014-01-28 21:09 ` Davidlohr Bueso
2014-03-11 12:41 ` [tip:core/locking] locking/mutexes: Return false if task need_resched() in mutex_can_spin_on_owner() tip-bot for Jason Low
2014-01-28 19:13 ` [PATCH v2 2/5] mutex: Modify the way optimistic spinners are queued Jason Low
2014-01-28 20:23 ` Paul E. McKenney
2014-01-28 20:24 ` Paul E. McKenney
2014-01-28 21:17 ` Davidlohr Bueso
2014-01-28 22:10 ` Jason Low
2014-02-02 21:58 ` Paul E. McKenney
2014-03-11 12:41 ` [tip:core/locking] locking/mutexes: " tip-bot for Jason Low
2014-03-11 15:24 ` Jason Low
2014-03-11 15:33 ` Peter Zijlstra
2014-01-28 19:13 ` [PATCH v2 3/5] mutex: Unlock the mutex without the wait_lock Jason Low
2014-03-11 12:41 ` [tip:core/locking] locking/mutexes: " tip-bot for Jason Low
2014-03-12 12:24 ` Peter Zijlstra
2014-03-12 18:44 ` Jason Low
2014-03-13 7:28 ` [tip:core/locking] locking/mutex: Fix debug checks tip-bot for Peter Zijlstra
2014-01-28 19:13 ` [RFC][PATCH v2 4/5] mutex: Disable preemtion between modifying lock->owner and locking/unlocking mutex Jason Low
2014-01-28 20:54 ` Peter Zijlstra
2014-01-28 22:17 ` Jason Low
2014-01-28 19:13 ` [RFC][PATCH v2 5/5] mutex: Give spinners a chance to spin_on_owner if need_resched() triggered while queued Jason Low
2014-01-28 21:07 ` Peter Zijlstra
2014-01-28 22:51 ` Jason Low
2014-01-29 11:51 ` Peter Zijlstra
2014-01-31 3:29 ` Jason Low
2014-01-31 14:09 ` Peter Zijlstra
2014-01-31 20:01 ` Jason Low
2014-01-31 20:08 ` Peter Zijlstra
2014-02-02 21:01 ` Jason Low
2014-02-02 21:12 ` Peter Zijlstra
2014-02-03 18:39 ` Jason Low
2014-02-03 19:25 ` Peter Zijlstra
2014-02-03 20:55 ` Jason Low
2014-02-03 21:06 ` Peter Zijlstra
2014-02-03 21:56 ` Jason Low
2014-02-04 7:13 ` Jason Low
2014-02-02 22:02 ` Paul E. McKenney
2014-02-02 20:02 ` Peter Zijlstra
2014-02-05 21:44 ` Waiman Long [this message]
2014-02-06 14:04 ` Peter Zijlstra
2014-02-06 18:45 ` Waiman Long
2014-02-06 20:10 ` Norton, Scott J
2014-02-10 17:01 ` Peter Zijlstra
2014-02-06 17:44 ` Jason Low
2014-02-06 18:37 ` Waiman Long
2014-01-28 21:08 ` [PATCH v2 0/5] mutex: Mutex scalability patches Davidlohr Bueso
2014-01-28 23:11 ` Jason Low
-- strict thread matches above, loose matches on Subject: below --
2014-02-06 14:52 [RFC][PATCH v2 5/5] mutex: Give spinners a chance to spin_on_owner if need_resched() triggered while queued Daniel J Blueman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=52F2B0C2.8040408@hp.com \
--to=waiman.long@hp.com \
--cc=akpm@linux-foundation.org \
--cc=andi@firstfloor.org \
--cc=aswin@hp.com \
--cc=chegu_vinod@hp.com \
--cc=davidlohr@hp.com \
--cc=hpa@zytor.com \
--cc=jason.low2@hp.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=riel@redhat.com \
--cc=scott.norton@hp.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).