From: Peter Zijlstra <peterz@infradead.org>
To: Jason Low <jason.low2@hp.com>
Cc: mingo@redhat.com, paulmck@linux.vnet.ibm.com, Waiman.Long@hp.com,
torvalds@linux-foundation.org, tglx@linutronix.de,
linux-kernel@vger.kernel.org, riel@redhat.com,
akpm@linux-foundation.org, davidlohr@hp.com, hpa@zytor.com,
andi@firstfloor.org, aswin@hp.com, scott.norton@hp.com,
chegu_vinod@hp.com
Subject: Re: [RFC][PATCH v2 5/5] mutex: Give spinners a chance to spin_on_owner if need_resched() triggered while queued
Date: Wed, 29 Jan 2014 12:51:42 +0100 [thread overview]
Message-ID: <20140129115142.GE9636@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <1390949495.2807.52.camel@j-VirtualBox>
On Tue, Jan 28, 2014 at 02:51:35PM -0800, Jason Low wrote:
> > But urgh, nasty problem. Lemme ponder this a bit.
OK, please have a very careful look at the below. It survived a boot
with udev -- which usually stresses mutex contention enough to explode
(in fact it did a few time when I got the contention/cancel path wrong),
however I have not ran anything else on it.
The below is an MCS variant that allows relatively cheap unqueueing. But
its somewhat tricky and I might have gotten a case wrong, esp. the
double concurrent cancel case got my head hurting (I didn't attempt a
tripple unqueue).
Applies to tip/master but does generate a few (harmless) compile
warnings because I didn't fully clean up the mcs_spinlock vs m_spinlock
thing.
Also, there's a comment in the slowpath that bears consideration.
---
kernel/locking/mutex.c | 158 +++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 148 insertions(+), 10 deletions(-)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 45fe1b5293d6..4a69da73903c 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -166,6 +166,9 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
struct task_struct *owner;
int retval = 1;
+ if (need_resched())
+ return 0;
+
rcu_read_lock();
owner = ACCESS_ONCE(lock->owner);
if (owner)
@@ -358,6 +361,134 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
spin_unlock_mutex(&lock->base.wait_lock, flags);
}
+struct m_spinlock {
+ struct m_spinlock *next, *prev;
+ int locked; /* 1 if lock acquired */
+};
+
+/*
+ * Using a single mcs node per CPU is safe because mutex_lock() should not be
+ * called from interrupt context and we have preemption disabled over the mcs
+ * lock usage.
+ */
+static DEFINE_PER_CPU(struct m_spinlock, m_node);
+
+static bool m_spin_lock(struct m_spinlock **lock)
+{
+ struct m_spinlock *node = this_cpu_ptr(&m_node);
+ struct m_spinlock *prev, *next;
+
+ node->locked = 0;
+ node->next = NULL;
+
+ node->prev = prev = xchg(lock, node);
+ if (likely(prev == NULL))
+ return true;
+
+ ACCESS_ONCE(prev->next) = node;
+
+ /*
+ * Normally @prev is untouchable after the above store; because at that
+ * moment unlock can proceed and wipe the node element from stack.
+ *
+ * However, since our nodes are static per-cpu storage, we're
+ * guaranteed their existence -- this allows us to apply
+ * cmpxchg in an attempt to undo our queueing.
+ */
+
+ while (!smp_load_acquire(&node->locked)) {
+ if (need_resched())
+ goto unqueue;
+ arch_mutex_cpu_relax();
+ }
+ return true;
+
+unqueue:
+
+ /*
+ * Undo our @prev->next assignment; this will make @prev's unlock()
+ * wait for a next pointer since @lock points to us.
+ */
+ if (cmpxchg(&prev->next, node, NULL) != node) { /* A -> B */
+ /*
+ * @prev->next no longer pointed to us; either we hold the lock
+ * or @prev cancelled the wait too and we need to reload and
+ * retry.
+ */
+ if (smp_load_acquire(&node->locked))
+ return true;
+
+ /*
+ * Because we observed the new @prev->next, the smp_wmb() at (B)
+ * ensures that we must now observe the new @node->prev.
+ */
+ prev = ACCESS_ONCE(node->prev);
+ goto unqueue;
+ }
+
+ if (smp_load_acquire(&node->locked)) {
+ /*
+ * OOPS, we were too late, we already got the lock. No harm
+ * done though; @prev is now unused an nobody cares we frobbed
+ * it.
+ */
+ return true;
+ }
+
+ /*
+ * Per the above logic @prev's unlock() will now wait,
+ * therefore @prev is now stable.
+ */
+
+ if (cmpxchg(lock, node, prev) == node) {
+ /*
+ * We were still the last queued, we moved @lock back. @prev
+ * will now observe @lock and will complete its unlock().
+ */
+ return false;
+ }
+
+ /*
+ * We're not the last to be queued, obtain ->next.
+ */
+
+ while (!(next = ACCESS_ONCE(node->next)))
+ arch_mutex_cpu_relax();
+
+ ACCESS_ONCE(next->prev) = prev;
+
+ /*
+ * Ensure that @next->prev is written before we write @prev->next,
+ * this guarantees that when the cmpxchg at (A) fails we must
+ * observe the new prev value.
+ */
+ smp_wmb(); /* B -> A */
+
+ /*
+ * And point @prev to our next, and we're unlinked. We can use an
+ * non-atomic op because only we modify @prev->next.
+ */
+ ACCESS_ONCE(prev->next) = next;
+
+ return false;
+}
+
+static void m_spin_unlock(struct m_spinlock **lock)
+{
+ struct m_spinlock *node = this_cpu_ptr(&m_node);
+ struct m_spinlock *next = ACCESS_ONCE(node->next);
+
+ while (likely(!next)) {
+ if (likely(cmpxchg(lock, node, NULL) == node))
+ return;
+
+ arch_mutex_cpu_relax();
+ next = ACCESS_ONCE(node->next);
+ }
+
+ smp_store_release(&next->locked, 1);
+}
+
/*
* Lock a mutex (possibly interruptible), slowpath:
*/
@@ -400,9 +531,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
if (!mutex_can_spin_on_owner(lock))
goto slowpath;
+ if (!m_spin_lock(&lock->mcs_lock))
+ goto slowpath;
+
for (;;) {
struct task_struct *owner;
- struct mcs_spinlock node;
if (use_ww_ctx && ww_ctx->acquired > 0) {
struct ww_mutex *ww;
@@ -417,19 +550,16 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* performed the optimistic spinning cannot be done.
*/
if (ACCESS_ONCE(ww->ctx))
- goto slowpath;
+ break;
}
/*
* If there's an owner, wait for it to either
* release the lock or go to sleep.
*/
- mcs_spin_lock(&lock->mcs_lock, &node);
owner = ACCESS_ONCE(lock->owner);
- if (owner && !mutex_spin_on_owner(lock, owner)) {
- mcs_spin_unlock(&lock->mcs_lock, &node);
- goto slowpath;
- }
+ if (owner && !mutex_spin_on_owner(lock, owner))
+ break;
if ((atomic_read(&lock->count) == 1) &&
(atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
@@ -442,11 +572,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
}
mutex_set_owner(lock);
- mcs_spin_unlock(&lock->mcs_lock, &node);
+ m_spin_unlock(&lock->mcs_lock);
preempt_enable();
return 0;
}
- mcs_spin_unlock(&lock->mcs_lock, &node);
/*
* When there's no owner, we might have preempted between the
@@ -455,7 +584,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* the owner complete.
*/
if (!owner && (need_resched() || rt_task(task)))
- goto slowpath;
+ break;
/*
* The cpu_relax() call is a compiler barrier which forces
@@ -465,10 +594,19 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
*/
arch_mutex_cpu_relax();
}
+ m_spin_unlock(&lock->mcs_lock);
slowpath:
#endif
spin_lock_mutex(&lock->wait_lock, flags);
+ /*
+ * XXX arguably, when need_resched() is set and there's other pending
+ * owners we should not try-acquire and simply queue and schedule().
+ *
+ * There's nothing worse than obtaining a lock only to get immediately
+ * scheduled out.
+ */
+
/* once more, can we acquire the lock? */
if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, 0) == 1))
goto skip_wait;
next prev parent reply other threads:[~2014-01-29 11:52 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-01-28 19:13 [PATCH v2 0/5] mutex: Mutex scalability patches Jason Low
2014-01-28 19:13 ` [PATCH v2 1/5] mutex: In mutex_can_spin_on_owner(), return false if task need_resched() Jason Low
2014-01-28 20:20 ` Paul E. McKenney
2014-01-28 22:01 ` Jason Low
2014-01-28 21:09 ` Davidlohr Bueso
2014-03-11 12:41 ` [tip:core/locking] locking/mutexes: Return false if task need_resched() in mutex_can_spin_on_owner() tip-bot for Jason Low
2014-01-28 19:13 ` [PATCH v2 2/5] mutex: Modify the way optimistic spinners are queued Jason Low
2014-01-28 20:23 ` Paul E. McKenney
2014-01-28 20:24 ` Paul E. McKenney
2014-01-28 21:17 ` Davidlohr Bueso
2014-01-28 22:10 ` Jason Low
2014-02-02 21:58 ` Paul E. McKenney
2014-03-11 12:41 ` [tip:core/locking] locking/mutexes: " tip-bot for Jason Low
2014-03-11 15:24 ` Jason Low
2014-03-11 15:33 ` Peter Zijlstra
2014-01-28 19:13 ` [PATCH v2 3/5] mutex: Unlock the mutex without the wait_lock Jason Low
2014-03-11 12:41 ` [tip:core/locking] locking/mutexes: " tip-bot for Jason Low
2014-03-12 12:24 ` Peter Zijlstra
2014-03-12 18:44 ` Jason Low
2014-03-13 7:28 ` [tip:core/locking] locking/mutex: Fix debug checks tip-bot for Peter Zijlstra
2014-01-28 19:13 ` [RFC][PATCH v2 4/5] mutex: Disable preemtion between modifying lock->owner and locking/unlocking mutex Jason Low
2014-01-28 20:54 ` Peter Zijlstra
2014-01-28 22:17 ` Jason Low
2014-01-28 19:13 ` [RFC][PATCH v2 5/5] mutex: Give spinners a chance to spin_on_owner if need_resched() triggered while queued Jason Low
2014-01-28 21:07 ` Peter Zijlstra
2014-01-28 22:51 ` Jason Low
2014-01-29 11:51 ` Peter Zijlstra [this message]
2014-01-31 3:29 ` Jason Low
2014-01-31 14:09 ` Peter Zijlstra
2014-01-31 20:01 ` Jason Low
2014-01-31 20:08 ` Peter Zijlstra
2014-02-02 21:01 ` Jason Low
2014-02-02 21:12 ` Peter Zijlstra
2014-02-03 18:39 ` Jason Low
2014-02-03 19:25 ` Peter Zijlstra
2014-02-03 20:55 ` Jason Low
2014-02-03 21:06 ` Peter Zijlstra
2014-02-03 21:56 ` Jason Low
2014-02-04 7:13 ` Jason Low
2014-02-02 22:02 ` Paul E. McKenney
2014-02-02 20:02 ` Peter Zijlstra
2014-02-05 21:44 ` Waiman Long
2014-02-06 14:04 ` Peter Zijlstra
2014-02-06 18:45 ` Waiman Long
2014-02-06 20:10 ` Norton, Scott J
2014-02-10 17:01 ` Peter Zijlstra
2014-02-06 17:44 ` Jason Low
2014-02-06 18:37 ` Waiman Long
2014-01-28 21:08 ` [PATCH v2 0/5] mutex: Mutex scalability patches Davidlohr Bueso
2014-01-28 23:11 ` Jason Low
-- strict thread matches above, loose matches on Subject: below --
2014-02-06 14:52 [RFC][PATCH v2 5/5] mutex: Give spinners a chance to spin_on_owner if need_resched() triggered while queued Daniel J Blueman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140129115142.GE9636@twins.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=Waiman.Long@hp.com \
--cc=akpm@linux-foundation.org \
--cc=andi@firstfloor.org \
--cc=aswin@hp.com \
--cc=chegu_vinod@hp.com \
--cc=davidlohr@hp.com \
--cc=hpa@zytor.com \
--cc=jason.low2@hp.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=riel@redhat.com \
--cc=scott.norton@hp.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.