All of lore.kernel.org
 help / color / mirror / Atom feed
From: Waiman Long <waiman.long@hp.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Jason Low <jason.low2@hp.com>,
	mingo@redhat.com, paulmck@linux.vnet.ibm.com,
	torvalds@linux-foundation.org, tglx@linutronix.de,
	linux-kernel@vger.kernel.org, riel@redhat.com,
	akpm@linux-foundation.org, davidlohr@hp.com, hpa@zytor.com,
	andi@firstfloor.org, aswin@hp.com, scott.norton@hp.com,
	chegu_vinod@hp.com
Subject: Re: [RFC][PATCH v2 5/5] mutex: Give spinners a chance to spin_on_owner if need_resched() triggered while queued
Date: Wed, 05 Feb 2014 16:44:34 -0500	[thread overview]
Message-ID: <52F2B0C2.8040408@hp.com> (raw)
In-Reply-To: <20140129115142.GE9636@twins.programming.kicks-ass.net>

On 01/29/2014 06:51 AM, Peter Zijlstra wrote:
> On Tue, Jan 28, 2014 at 02:51:35PM -0800, Jason Low wrote:
>>> But urgh, nasty problem. Lemme ponder this a bit.
> OK, please have a very careful look at the below. It survived a boot
> with udev -- which usually stresses mutex contention enough to explode
> (in fact it did a few time when I got the contention/cancel path wrong),
> however I have not ran anything else on it.
>
> The below is an MCS variant that allows relatively cheap unqueueing. But
> its somewhat tricky and I might have gotten a case wrong, esp. the
> double concurrent cancel case got my head hurting (I didn't attempt a
> tripple unqueue).
>
> Applies to tip/master but does generate a few (harmless) compile
> warnings because I didn't fully clean up the mcs_spinlock vs m_spinlock
> thing.
>
> Also, there's a comment in the slowpath that bears consideration.
>
>

I have an alternative way of breaking out of the MCS lock waiting queue 
when need_resched() is set. I overload the locked flag to indicate a 
skipped node if negative. I run the patch through the AIM7 high-systime 
workload on a 4-socket server and it seemed to run fine.

Please check the following POC patch to see if you have any comment.
diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index e9a4d74..84a0b32 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -14,17 +14,45 @@

  struct mcs_spinlock {
      struct mcs_spinlock *next;
-    int locked; /* 1 if lock acquired */
+    int locked; /* 1 if lock acquired, -1 if skipping node */
  };

+/*
+ * The node skipping feature requires the MCS node to be persistent,
+ * i.e. not allocated on stack. In addition, the MCS node cannot be
+ * reused until after the locked flag is cleared. The mcs_skip_node()
+ * macro must be defined before including this header file.
+ */
+#ifdef mcs_skip_node
+#undef arch_mcs_spin_lock_contended
+#undef arch_mcs_spin_unlock_contended
+
+#define    arch_mcs_spin_lock_contended(n)                    \
+do {                                    \
+    while (!smp_load_acquire(&(n)->locked)) {            \
+        if (mcs_skip_node(n)) {                    \
+            if (cmpxchg(&(n)->locked, 0, -1) == 0)        \
+                return;                    \
+        }                            \
+        arch_mutex_cpu_relax();                    \
+    };                                \
+} while (0)
+
+#define    arch_mcs_spin_unlock_contended(n)                \
+    if (cmpxchg(&(n)->locked, 0, 1) == 0)                \
+        return
+
+#define mcs_set_locked(n, v)    (n)->locked = (v)
+#else    /* mcs_skip_node */
+
  #ifndef arch_mcs_spin_lock_contended
  /*
   * Using smp_load_acquire() provides a memory barrier that ensures
   * subsequent operations happen after the lock is acquired.
   */
-#define arch_mcs_spin_lock_contended(l)                    \
+#define arch_mcs_spin_lock_contended(n)                    \
  do {                                    \
-    while (!(smp_load_acquire(l)))                    \
+    while (!(smp_load_acquire(&(n)->locked)))            \
          arch_mutex_cpu_relax();                    \
  } while (0)
  #endif
@@ -35,9 +63,12 @@ do {                                \
   * operations in the critical section has been completed before
   * unlocking.
   */
-#define arch_mcs_spin_unlock_contended(l)                \
-    smp_store_release((l), 1)
+#define arch_mcs_spin_unlock_contended(n)                \
+    smp_store_release(&(n)->locked, 1)
+
+#define mcs_set_locked(n, v)
  #endif
+#endif    /* mcs_skip_node */

  /*
   * Note: the smp_load_acquire/smp_store_release pair is not
@@ -77,12 +108,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, 
struct mcs_spinlock *node)
           * value won't be used. If a debug mode is needed to
           * audit lock status, then set node->locked value here.
           */
+        mcs_set_locked(node, 1);
          return;
      }
      ACCESS_ONCE(prev->next) = node;

      /* Wait until the lock holder passes the lock down. */
-    arch_mcs_spin_lock_contended(&node->locked);
+    arch_mcs_spin_lock_contended(node);
  }

  /*
@@ -94,19 +126,35 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, 
struct mcs_spinlock *node)
  {
      struct mcs_spinlock *next = ACCESS_ONCE(node->next);

+#ifdef mcs_skip_node
+check_next_node:
+#endif
      if (likely(!next)) {
          /*
           * Release the lock by setting it to NULL
           */
-        if (likely(cmpxchg(lock, node, NULL) == node))
+        if (likely(cmpxchg(lock, node, NULL) == node)) {
+            mcs_set_locked(node, 0);
              return;
+        }
          /* Wait until the next pointer is set */
          while (!(next = ACCESS_ONCE(node->next)))
              arch_mutex_cpu_relax();
      }
+    /* Clear the lock flag to indicate the node can be used again. */
+    mcs_set_locked(node, 0);

      /* Pass lock to next waiter. */
-    arch_mcs_spin_unlock_contended(&next->locked);
+    arch_mcs_spin_unlock_contended(next);
+
+#ifdef mcs_skip_node
+    /*
+     * The next node should be skipped
+     */
+    node = next;
+    next = ACCESS_ONCE(node->next);
+    goto check_next_node;
+#endif
  }

  #endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 45fe1b5..6351eca 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,6 +25,10 @@
  #include <linux/spinlock.h>
  #include <linux/interrupt.h>
  #include <linux/debug_locks.h>
+/*
+ * Allowed an MCS node to be skipped if need_resched() is true.
+ */
+#define mcs_skip_node(n)    need_resched()
  #include <linux/mcs_spinlock.h>

  /*
@@ -45,6 +49,13 @@
   */
  #define    MUTEX_SHOW_NO_WAITER(mutex)    
(atomic_read(&(mutex)->count) >= 0)

+/*
+ * Using a single mcs node per CPU is safe because mutex_lock() should 
not be
+ * called from interrupt context and we have preemption disabled over 
the mcs
+ * lock usage.
+ */
+static DEFINE_PER_CPU(struct mcs_spinlock, m_node);
+
  void
  __mutex_init(struct mutex *lock, const char *name, struct 
lock_class_key *key)
  {
@@ -166,6 +177,9 @@ static inline int mutex_can_spin_on_owner(struct 
mutex *lock)
      struct task_struct *owner;
      int retval = 1;

+    if (need_resched())
+        return 0;
+
      rcu_read_lock();
      owner = ACCESS_ONCE(lock->owner);
      if (owner)
@@ -370,6 +384,7 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
      struct mutex_waiter waiter;
      unsigned long flags;
      int ret;
+    struct mcs_spinlock  *node;

      preempt_disable();
      mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
@@ -400,9 +415,13 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
      if (!mutex_can_spin_on_owner(lock))
          goto slowpath;

+    node = this_cpu_ptr(&m_node);
+    if (node->locked < 0)
+        /* MCS node not available yet */
+        goto slowpath;
+
      for (;;) {
          struct task_struct *owner;
-        struct mcs_spinlock  node;

          if (use_ww_ctx && ww_ctx->acquired > 0) {
              struct ww_mutex *ww;
@@ -424,10 +443,15 @@ __mutex_lock_common(struct mutex *lock, long 
state, unsigned int subclass,
           * If there's an owner, wait for it to either
           * release the lock or go to sleep.
           */
-        mcs_spin_lock(&lock->mcs_lock, &node);
+        mcs_spin_lock(&lock->mcs_lock, node);
+        if (node->locked < 0)
+            /*
+             * need_resched() true, no unlock needed
+             */
+            goto slowpath;
          owner = ACCESS_ONCE(lock->owner);
          if (owner && !mutex_spin_on_owner(lock, owner)) {
-            mcs_spin_unlock(&lock->mcs_lock, &node);
+            mcs_spin_unlock(&lock->mcs_lock, node);
              goto slowpath;
          }

@@ -442,11 +466,11 @@ __mutex_lock_common(struct mutex *lock, long 
state, unsigned int subclass,
              }

              mutex_set_owner(lock);
-            mcs_spin_unlock(&lock->mcs_lock, &node);
+            mcs_spin_unlock(&lock->mcs_lock, node);
              preempt_enable();
              return 0;
          }
-        mcs_spin_unlock(&lock->mcs_lock, &node);
+        mcs_spin_unlock(&lock->mcs_lock, node);

          /*
           * When there's no owner, we might have preempted between the




  parent reply	other threads:[~2014-02-05 21:45 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-28 19:13 [PATCH v2 0/5] mutex: Mutex scalability patches Jason Low
2014-01-28 19:13 ` [PATCH v2 1/5] mutex: In mutex_can_spin_on_owner(), return false if task need_resched() Jason Low
2014-01-28 20:20   ` Paul E. McKenney
2014-01-28 22:01     ` Jason Low
2014-01-28 21:09   ` Davidlohr Bueso
2014-03-11 12:41   ` [tip:core/locking] locking/mutexes: Return false if task need_resched() in mutex_can_spin_on_owner() tip-bot for Jason Low
2014-01-28 19:13 ` [PATCH v2 2/5] mutex: Modify the way optimistic spinners are queued Jason Low
2014-01-28 20:23   ` Paul E. McKenney
2014-01-28 20:24     ` Paul E. McKenney
2014-01-28 21:17     ` Davidlohr Bueso
2014-01-28 22:10     ` Jason Low
2014-02-02 21:58       ` Paul E. McKenney
2014-03-11 12:41   ` [tip:core/locking] locking/mutexes: " tip-bot for Jason Low
2014-03-11 15:24     ` Jason Low
2014-03-11 15:33       ` Peter Zijlstra
2014-01-28 19:13 ` [PATCH v2 3/5] mutex: Unlock the mutex without the wait_lock Jason Low
2014-03-11 12:41   ` [tip:core/locking] locking/mutexes: " tip-bot for Jason Low
2014-03-12 12:24     ` Peter Zijlstra
2014-03-12 18:44       ` Jason Low
2014-03-13  7:28       ` [tip:core/locking] locking/mutex: Fix debug checks tip-bot for Peter Zijlstra
2014-01-28 19:13 ` [RFC][PATCH v2 4/5] mutex: Disable preemtion between modifying lock->owner and locking/unlocking mutex Jason Low
2014-01-28 20:54   ` Peter Zijlstra
2014-01-28 22:17     ` Jason Low
2014-01-28 19:13 ` [RFC][PATCH v2 5/5] mutex: Give spinners a chance to spin_on_owner if need_resched() triggered while queued Jason Low
2014-01-28 21:07   ` Peter Zijlstra
2014-01-28 22:51     ` Jason Low
2014-01-29 11:51       ` Peter Zijlstra
2014-01-31  3:29         ` Jason Low
2014-01-31 14:09           ` Peter Zijlstra
2014-01-31 20:01             ` Jason Low
2014-01-31 20:08               ` Peter Zijlstra
2014-02-02 21:01                 ` Jason Low
2014-02-02 21:12                   ` Peter Zijlstra
2014-02-03 18:39                     ` Jason Low
2014-02-03 19:25                       ` Peter Zijlstra
2014-02-03 20:55                         ` Jason Low
2014-02-03 21:06                           ` Peter Zijlstra
2014-02-03 21:56                             ` Jason Low
2014-02-04  7:13                         ` Jason Low
2014-02-02 22:02                 ` Paul E. McKenney
2014-02-02 20:02             ` Peter Zijlstra
2014-02-05 21:44         ` Waiman Long [this message]
2014-02-06 14:04           ` Peter Zijlstra
2014-02-06 18:45             ` Waiman Long
2014-02-06 20:10               ` Norton, Scott J
2014-02-10 17:01                 ` Peter Zijlstra
2014-02-06 17:44           ` Jason Low
2014-02-06 18:37             ` Waiman Long
2014-01-28 21:08 ` [PATCH v2 0/5] mutex: Mutex scalability patches Davidlohr Bueso
2014-01-28 23:11   ` Jason Low
  -- strict thread matches above, loose matches on Subject: below --
2014-02-06 14:52 [RFC][PATCH v2 5/5] mutex: Give spinners a chance to spin_on_owner if need_resched() triggered while queued Daniel J Blueman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52F2B0C2.8040408@hp.com \
    --to=waiman.long@hp.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=aswin@hp.com \
    --cc=chegu_vinod@hp.com \
    --cc=davidlohr@hp.com \
    --cc=hpa@zytor.com \
    --cc=jason.low2@hp.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=scott.norton@hp.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.