[PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
       [not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
@ 2013-11-08 19:51 ` Tim Chen
  2013-11-19 19:10   ` Paul E. McKenney
  2013-11-08 19:52 ` [PATCH v5 2/4] MCS Lock: optimizations and extra comments Tim Chen
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 16+ messages in thread
From: Tim Chen @ 2013-11-08 19:51 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

We will need the MCS lock code for doing optimistic spinning for rwsem
and queue rwlock.  Extracting the MCS code from mutex.c and put into
its own file allow us to reuse this code easily.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
---
 include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mutex.h        |    5 ++-
 kernel/locking/mutex.c       |   60 ++++----------------------------------
 3 files changed, 74 insertions(+), 55 deletions(-)
 create mode 100644 include/linux/mcs_spinlock.h

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
new file mode 100644
index 0000000..b5de3b0
--- /dev/null
+++ b/include/linux/mcs_spinlock.h
@@ -0,0 +1,64 @@
+/*
+ * MCS lock defines
+ *
+ * This file contains the main data structure and API definitions of MCS lock.
+ *
+ * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
+ * with the desirable properties of being fair, and with each cpu trying
+ * to acquire the lock spinning on a local variable.
+ * It avoids expensive cache bouncings that common test-and-set spin-lock
+ * implementations incur.
+ */
+#ifndef __LINUX_MCS_SPINLOCK_H
+#define __LINUX_MCS_SPINLOCK_H
+
+struct mcs_spinlock {
+	struct mcs_spinlock *next;
+	int locked; /* 1 if lock acquired */
+};
+
+/*
+ * We don't inline mcs_spin_lock() so that perf can correctly account for the
+ * time spent in this lock function.
+ */
+static noinline
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+	struct mcs_spinlock *prev;
+
+	/* Init node */
+	node->locked = 0;
+	node->next   = NULL;
+
+	prev = xchg(lock, node);
+	if (likely(prev == NULL)) {
+		/* Lock acquired */
+		node->locked = 1;
+		return;
+	}
+	ACCESS_ONCE(prev->next) = node;
+	smp_wmb();
+	/* Wait until the lock holder passes the lock down */
+	while (!ACCESS_ONCE(node->locked))
+		arch_mutex_cpu_relax();
+}
+
+static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+{
+	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+
+	if (likely(!next)) {
+		/*
+		 * Release the lock by setting it to NULL
+		 */
+		if (cmpxchg(lock, node, NULL) == node)
+			return;
+		/* Wait until the next pointer is set */
+		while (!(next = ACCESS_ONCE(node->next)))
+			arch_mutex_cpu_relax();
+	}
+	ACCESS_ONCE(next->locked) = 1;
+	smp_wmb();
+}
+
+#endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index bab49da..32a32e6 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -46,6 +46,7 @@
  * - detects multi-task circular deadlocks and prints out all affected
  *   locks and tasks (and only those tasks)
  */
+struct mcs_spinlock;
 struct mutex {
 	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
 	atomic_t		count;
@@ -55,7 +56,7 @@ struct mutex {
 	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	void			*spin_mlock;	/* Spinner MCS lock */
+	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	const char 		*name;
@@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 # define arch_mutex_cpu_relax() cpu_relax()
 #endif
 
-#endif
+#endif /* __LINUX_MUTEX_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d24105b..e08b183 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -25,6 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
+#include <linux/mcs_spinlock.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 	INIT_LIST_HEAD(&lock->wait_list);
 	mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	lock->spin_mlock = NULL;
+	lock->mcs_lock = NULL;
 #endif
 
 	debug_mutex_init(lock, name, key);
@@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
  * more or less simultaneously, the spinners need to acquire a MCS lock
  * first before spinning on the owner field.
  *
- * We don't inline mspin_lock() so that perf can correctly account for the
- * time spent in this lock function.
  */
-struct mspin_node {
-	struct mspin_node *next ;
-	int		  locked;	/* 1 if lock acquired */
-};
-#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
-
-static noinline
-void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
-{
-	struct mspin_node *prev;
-
-	/* Init node */
-	node->locked = 0;
-	node->next   = NULL;
-
-	prev = xchg(lock, node);
-	if (likely(prev == NULL)) {
-		/* Lock acquired */
-		node->locked = 1;
-		return;
-	}
-	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
-		arch_mutex_cpu_relax();
-}
-
-static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
-{
-	struct mspin_node *next = ACCESS_ONCE(node->next);
-
-	if (likely(!next)) {
-		/*
-		 * Release the lock by setting it to NULL
-		 */
-		if (cmpxchg(lock, node, NULL) == node)
-			return;
-		/* Wait until the next pointer is set */
-		while (!(next = ACCESS_ONCE(node->next)))
-			arch_mutex_cpu_relax();
-	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
-}
 
 /*
  * Mutex spinning code migrated from kernel/sched/core.c
@@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 
 	for (;;) {
 		struct task_struct *owner;
-		struct mspin_node  node;
+		struct mcs_spinlock  node;
 
 		if (use_ww_ctx && ww_ctx->acquired > 0) {
 			struct ww_mutex *ww;
@@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		 * If there's an owner, wait for it to either
 		 * release the lock or go to sleep.
 		 */
-		mspin_lock(MLOCK(lock), &node);
+		mcs_spin_lock(&lock->mcs_lock, &node);
 		owner = ACCESS_ONCE(lock->owner);
 		if (owner && !mutex_spin_on_owner(lock, owner)) {
-			mspin_unlock(MLOCK(lock), &node);
+			mcs_spin_unlock(&lock->mcs_lock, &node);
 			goto slowpath;
 		}
 
@@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 			}
 
 			mutex_set_owner(lock);
-			mspin_unlock(MLOCK(lock), &node);
+			mcs_spin_unlock(&lock->mcs_lock, &node);
 			preempt_enable();
 			return 0;
 		}
-		mspin_unlock(MLOCK(lock), &node);
+		mcs_spin_unlock(&lock->mcs_lock, &node);
 
 		/*
 		 * When there's no owner, we might have preempted between the
-- 
1.7.4.4




^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v5 2/4] MCS Lock: optimizations and extra comments
       [not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
  2013-11-08 19:51 ` [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file Tim Chen
@ 2013-11-08 19:52 ` Tim Chen
  2013-11-19 19:13   ` Paul E. McKenney
  2013-11-08 19:52 ` [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file Tim Chen
  2013-11-08 19:52 ` [PATCH v5 4/4] MCS Lock: Barrier corrections Tim Chen
  3 siblings, 1 reply; 16+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

From: Jason Low <jason.low2@hp.com>

Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
check in mcs_spin_unlock() likely() as it is likely that a race did not occur
most of the time.

Also add in more comments describing how the local node is used in MCS locks.

Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
---
 include/linux/mcs_spinlock.h |   13 +++++++++++--
 1 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index b5de3b0..96f14299 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -18,6 +18,12 @@ struct mcs_spinlock {
 };
 
 /*
+ * In order to acquire the lock, the caller should declare a local node and
+ * pass a reference of the node to this function in addition to the lock.
+ * If the lock has already been acquired, then this will proceed to spin
+ * on this node->locked until the previous lock holder sets the node->locked
+ * in mcs_spin_unlock().
+ *
  * We don't inline mcs_spin_lock() so that perf can correctly account for the
  * time spent in this lock function.
  */
@@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	prev = xchg(lock, node);
 	if (likely(prev == NULL)) {
 		/* Lock acquired */
-		node->locked = 1;
 		return;
 	}
 	ACCESS_ONCE(prev->next) = node;
@@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 		arch_mutex_cpu_relax();
 }
 
+/*
+ * Releases the lock. The caller should pass in the corresponding node that
+ * was used to acquire the lock.
+ */
 static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
@@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 		/*
 		 * Release the lock by setting it to NULL
 		 */
-		if (cmpxchg(lock, node, NULL) == node)
+		if (likely(cmpxchg(lock, node, NULL) == node))
 			return;
 		/* Wait until the next pointer is set */
 		while (!(next = ACCESS_ONCE(node->next)))
-- 
1.7.4.4




^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file
       [not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
  2013-11-08 19:51 ` [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file Tim Chen
  2013-11-08 19:52 ` [PATCH v5 2/4] MCS Lock: optimizations and extra comments Tim Chen
@ 2013-11-08 19:52 ` Tim Chen
  2013-11-19 19:15   ` Paul E. McKenney
  2013-11-08 19:52 ` [PATCH v5 4/4] MCS Lock: Barrier corrections Tim Chen
  3 siblings, 1 reply; 16+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

From: Waiman Long <Waiman.Long@hp.com>

The following changes are made:

1) Create a new mcs_spinlock.c file to contain the
   mcs_spin_lock() and mcs_spin_unlock() function.
2) Include a number of prerequisite header files and define
   arch_mutex_cpu_relax(), if not previously defined so the
   mcs functions can be compiled for multiple architecture without
   causing problems.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 include/linux/mcs_spinlock.h                       |   56 ++------------------
 kernel/locking/Makefile                            |    6 +-
 .../locking/mcs_spinlock.c                         |   31 ++++++-----
 3 files changed, 23 insertions(+), 70 deletions(-)
 copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%)

diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
index 96f14299..d54bb23 100644
--- a/include/linux/mcs_spinlock.h
+++ b/include/linux/mcs_spinlock.h
@@ -17,57 +17,9 @@ struct mcs_spinlock {
 	int locked; /* 1 if lock acquired */
 };
 
-/*
- * In order to acquire the lock, the caller should declare a local node and
- * pass a reference of the node to this function in addition to the lock.
- * If the lock has already been acquired, then this will proceed to spin
- * on this node->locked until the previous lock holder sets the node->locked
- * in mcs_spin_unlock().
- *
- * We don't inline mcs_spin_lock() so that perf can correctly account for the
- * time spent in this lock function.
- */
-static noinline
-void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
-	struct mcs_spinlock *prev;
-
-	/* Init node */
-	node->locked = 0;
-	node->next   = NULL;
-
-	prev = xchg(lock, node);
-	if (likely(prev == NULL)) {
-		/* Lock acquired */
-		return;
-	}
-	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
-		arch_mutex_cpu_relax();
-}
-
-/*
- * Releases the lock. The caller should pass in the corresponding node that
- * was used to acquire the lock.
- */
-static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
-{
-	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
-
-	if (likely(!next)) {
-		/*
-		 * Release the lock by setting it to NULL
-		 */
-		if (likely(cmpxchg(lock, node, NULL) == node))
-			return;
-		/* Wait until the next pointer is set */
-		while (!(next = ACCESS_ONCE(node->next)))
-			arch_mutex_cpu_relax();
-	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
-}
+extern
+void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
+extern
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
 
 #endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index baab8e5..20d9d5c 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o
 ifeq ($(CONFIG_PROC_FS),y)
 obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
 endif
-obj-$(CONFIG_SMP) += spinlock.o
-obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
+obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o
+obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o
 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
-obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c
similarity index 78%
copy from include/linux/mcs_spinlock.h
copy to kernel/locking/mcs_spinlock.c
index 96f14299..b6f27f8 100644
--- a/include/linux/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.c
@@ -1,7 +1,5 @@
 /*
- * MCS lock defines
- *
- * This file contains the main data structure and API definitions of MCS lock.
+ * MCS lock
  *
  * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
  * with the desirable properties of being fair, and with each cpu trying
@@ -9,13 +7,20 @@
  * It avoids expensive cache bouncings that common test-and-set spin-lock
  * implementations incur.
  */
-#ifndef __LINUX_MCS_SPINLOCK_H
-#define __LINUX_MCS_SPINLOCK_H
+/*
+ * asm/processor.h may define arch_mutex_cpu_relax().
+ * If it is not defined, cpu_relax() will be used.
+ */
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+#include <asm/processor.h>
+#include <linux/compiler.h>
+#include <linux/mcs_spinlock.h>
+#include <linux/export.h>
 
-struct mcs_spinlock {
-	struct mcs_spinlock *next;
-	int locked; /* 1 if lock acquired */
-};
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
+#endif
 
 /*
  * In order to acquire the lock, the caller should declare a local node and
@@ -23,11 +28,7 @@ struct mcs_spinlock {
  * If the lock has already been acquired, then this will proceed to spin
  * on this node->locked until the previous lock holder sets the node->locked
  * in mcs_spin_unlock().
- *
- * We don't inline mcs_spin_lock() so that perf can correctly account for the
- * time spent in this lock function.
  */
-static noinline
 void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *prev;
@@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	while (!ACCESS_ONCE(node->locked))
 		arch_mutex_cpu_relax();
 }
+EXPORT_SYMBOL_GPL(mcs_spin_lock);
 
 /*
  * Releases the lock. The caller should pass in the corresponding node that
@@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 	ACCESS_ONCE(next->locked) = 1;
 	smp_wmb();
 }
-
-#endif /* __LINUX_MCS_SPINLOCK_H */
+EXPORT_SYMBOL_GPL(mcs_spin_unlock);
-- 
1.7.4.4




^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v5 4/4] MCS Lock: Barrier corrections
       [not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
                   ` (2 preceding siblings ...)
  2013-11-08 19:52 ` [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file Tim Chen
@ 2013-11-08 19:52 ` Tim Chen
       [not found]   ` <20131111181049.GL28302@mudshark.cambridge.arm.com>
  2013-11-19 19:21   ` Paul E. McKenney
  3 siblings, 2 replies; 16+ messages in thread
From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, Thomas Gleixner
  Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen,
	Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann,
	Aswin Chandramouleeswaran, Scott J Norton, Will Deacon,
	Figo.zhang

From: Waiman Long <Waiman.Long@hp.com>

This patch corrects the way memory barriers are used in the MCS lock
with smp_load_acquire and smp_store_release fucnction.
It removes ones that are not needed.

It uses architecture specific load-acquire and store-release
primitives for synchronization, if available. Generic implementations
are provided in case they are not defined even though they may not
be optimal. These generic implementation could be removed later on
once changes are made in all the relevant header files.

Suggested-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
 1 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index b6f27f8..df5c167 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -23,6 +23,31 @@
 #endif
 
 /*
+ * Fall back to use the regular atomic operations and memory barrier if
+ * the acquire/release versions are not defined.
+ */
+#ifndef	xchg_acquire
+# define xchg_acquire(p, v)		xchg(p, v)
+#endif
+
+#ifndef	smp_load_acquire
+# define smp_load_acquire(p)				\
+	({						\
+		typeof(*p) __v = ACCESS_ONCE(*(p));	\
+		smp_mb();				\
+		__v;					\
+	})
+#endif
+
+#ifndef smp_store_release
+# define smp_store_release(p, v)		\
+	do {					\
+		smp_mb();			\
+		ACCESS_ONCE(*(p)) = v;		\
+	} while (0)
+#endif
+
+/*
  * In order to acquire the lock, the caller should declare a local node and
  * pass a reference of the node to this function in addition to the lock.
  * If the lock has already been acquired, then this will proceed to spin
@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	node->locked = 0;
 	node->next   = NULL;
 
-	prev = xchg(lock, node);
+	/* xchg() provides a memory barrier */
+	prev = xchg_acquire(lock, node);
 	if (likely(prev == NULL)) {
 		/* Lock acquired */
 		return;
 	}
 	ACCESS_ONCE(prev->next) = node;
-	smp_wmb();
-	/* Wait until the lock holder passes the lock down */
-	while (!ACCESS_ONCE(node->locked))
+	/*
+	 * Wait until the lock holder passes the lock down.
+	 * Using smp_load_acquire() provides a memory barrier that
+	 * ensures subsequent operations happen after the lock is acquired.
+	 */
+	while (!(smp_load_acquire(&node->locked)))
 		arch_mutex_cpu_relax();
 }
 EXPORT_SYMBOL_GPL(mcs_spin_lock);
@@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
  * Releases the lock. The caller should pass in the corresponding node that
  * was used to acquire the lock.
  */
-static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
+void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
 	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
 
@@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
 		while (!(next = ACCESS_ONCE(node->next)))
 			arch_mutex_cpu_relax();
 	}
-	ACCESS_ONCE(next->locked) = 1;
-	smp_wmb();
+	/*
+	 * Pass lock to next waiter.
+	 * smp_store_release() provides a memory barrier to ensure
+	 * all operations in the critical section has been completed
+	 * before unlocking.
+	 */
+	smp_store_release(&next->locked , 1);
 }
 EXPORT_SYMBOL_GPL(mcs_spin_unlock);
-- 
1.7.4.4



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
       [not found]     ` <1384204673.10046.6.camel@schen9-mobl3>
@ 2013-11-12 14:54       ` Waiman Long
  0 siblings, 0 replies; 16+ messages in thread
From: Waiman Long @ 2013-11-12 14:54 UTC (permalink / raw)
  To: Tim Chen, Peter Zijlstra
  Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Linus Torvalds, Andrea Arcangeli, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel,
	Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Figo.zhang

On 11/11/2013 04:17 PM, Tim Chen wrote:
>> You could then augment that with [cmp]xchg_{acquire,release} as
>> appropriate.
>>
>>> +/*
>>>    * In order to acquire the lock, the caller should declare a local node and
>>>    * pass a reference of the node to this function in addition to the lock.
>>>    * If the lock has already been acquired, then this will proceed to spin
>>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>>>   	node->locked = 0;
>>>   	node->next   = NULL;
>>>
>>> -	prev = xchg(lock, node);
>>> +	/* xchg() provides a memory barrier */
>>> +	prev = xchg_acquire(lock, node);
>>>   	if (likely(prev == NULL)) {
>>>   		/* Lock acquired */
>>>   		return;
>>>   	}
>>>   	ACCESS_ONCE(prev->next) = node;
>>> -	smp_wmb();
>>> -	/* Wait until the lock holder passes the lock down */
>>> -	while (!ACCESS_ONCE(node->locked))
>>> +	/*
>>> +	 * Wait until the lock holder passes the lock down.
>>> +	 * Using smp_load_acquire() provides a memory barrier that
>>> +	 * ensures subsequent operations happen after the lock is acquired.
>>> +	 */
>>> +	while (!(smp_load_acquire(&node->locked)))
>>>   		arch_mutex_cpu_relax();
> An alternate implementation is
> 	while (!ACCESS_ONCE(node->locked))
> 		arch_mutex_cpu_relax();
> 	smp_load_acquire(&node->locked);
>
> Leaving the smp_load_acquire at the end to provide appropriate barrier.
> Will that be acceptable?
>
> Tim

I second Tim's opinion. It will be help to have a smp_mb_load_acquire() 
function that provide a memory barrier with load-acquire semantic. I 
don't think we need one for store-release as that will not be in a loop.

Peter, what do you think about adding that to your patch?

-Longman

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
       [not found] <20131112160827.GB25953@mudshark.cambridge.arm.com>
@ 2013-11-12 17:16 ` George Spelvin
  0 siblings, 0 replies; 16+ messages in thread
From: George Spelvin @ 2013-11-12 17:16 UTC (permalink / raw)
  To: tim.c.chen, will.deacon
  Cc: a.p.zijlstra, aarcange, akpm, alex.shi, andi, arnd, aswin,
	dave.hansen, davidlohr.bueso, figo1802, hpa, linux-arch,
	linux-kernel, linux-mm, linux, matthew.r.wilcox, mingo, paulmck,
	peter, raghavendra.kt, riel, scott.norton, tglx, torvalds,
	waiman.long, walken

> On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote:
>> An alternate implementation is
>> 	while (!ACCESS_ONCE(node->locked))
>> 		arch_mutex_cpu_relax();
>> 	smp_load_acquire(&node->locked);
>> 
>> Leaving the smp_load_acquire at the end to provide appropriate barrier.
>> Will that be acceptable?

Will Deacon <will.deacon@arm.com> wrote:
> It still doesn't solve my problem though: I want a way to avoid that busy
> loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook
> is a start, but there is no corresponding hook on the unlock side to issue a
> wakeup. Given a sensible relax implementation, I don't have an issue with
> putting a load-acquire in a loop, since it shouldn't be aggresively spinning
> anymore.

So you want something like this?

/*
 * This is a spin-wait with acquire semantics.  That is, accesses after
 * this are not allowed to be reordered before the load that meets
 * the specified condition.  This requires that it end with either a
 * load-acquire or a full smp_mb().  The optimal way to do this is likely
 * to be architecture-dependent.  E.g. x86 MONITOR/MWAIT instructions.
 */
#ifndef smp_load_acquire_until
#define smp_load_acquire_until(addr, cond) \
	while (!(smp_load_acquire(addr) cond)) { \
		do { \
			arch_mutex_cpu_relax(); \
		} while (!(ACCESS_ONCE(*(addr)) cond)); \
	}
#endif

	smp_load_acquire_until(&node->locked, != 0);

Alternative implementations:

#define smp_load_acquire_until(addr, cond) { \
	while (!(ACCESS_ONCE(*(addr)) cond)) \
		arch_mutex_cpu_relax(); \
	smp_mb(); }

#define smp_load_acquire_until(addr, cond) \
	if (!(smp_load_acquire(addr) cond)) { \
		do { \
			arch_mutex_cpu_relax(); \
		} while (!(ACCESS_ONCE(*(addr)) cond)); \
		smp_mb(); \
	}

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
  2013-11-08 19:51 ` [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file Tim Chen
@ 2013-11-19 19:10   ` Paul E. McKenney
  2013-11-19 19:42     ` Tim Chen
  0 siblings, 1 reply; 16+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:10 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> We will need the MCS lock code for doing optimistic spinning for rwsem
> and queue rwlock.  Extracting the MCS code from mutex.c and put into
> its own file allow us to reuse this code easily.
> 
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>

Please see comments below.

							Thanx, Paul

> ---
>  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
>  include/linux/mutex.h        |    5 ++-
>  kernel/locking/mutex.c       |   60 ++++----------------------------------
>  3 files changed, 74 insertions(+), 55 deletions(-)
>  create mode 100644 include/linux/mcs_spinlock.h
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> new file mode 100644
> index 0000000..b5de3b0
> --- /dev/null
> +++ b/include/linux/mcs_spinlock.h
> @@ -0,0 +1,64 @@
> +/*
> + * MCS lock defines
> + *
> + * This file contains the main data structure and API definitions of MCS lock.
> + *
> + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> + * with the desirable properties of being fair, and with each cpu trying
> + * to acquire the lock spinning on a local variable.
> + * It avoids expensive cache bouncings that common test-and-set spin-lock
> + * implementations incur.
> + */
> +#ifndef __LINUX_MCS_SPINLOCK_H
> +#define __LINUX_MCS_SPINLOCK_H
> +
> +struct mcs_spinlock {
> +	struct mcs_spinlock *next;
> +	int locked; /* 1 if lock acquired */
> +};
> +
> +/*
> + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> + * time spent in this lock function.
> + */
> +static noinline
> +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +{
> +	struct mcs_spinlock *prev;
> +
> +	/* Init node */
> +	node->locked = 0;
> +	node->next   = NULL;
> +
> +	prev = xchg(lock, node);

OK, the full memory barriers implied by xchg() ensure that *node will be
initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
node into the list.  This rules out the misordering scenario that Tim
Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
on September 27th.

Assuming of course a corresponding barrier on the lock handoff side.

> +	if (likely(prev == NULL)) {
> +		/* Lock acquired */
> +		node->locked = 1;
> +		return;
> +	}
> +	ACCESS_ONCE(prev->next) = node;
> +	smp_wmb();

I don't see what the above memory barrier does.  Here are some things
that it cannot be doing:

o	Ordering the insertion into the list above with the polling
	below.  First, smp_wmb() does not order prior writes against
	later reads, and second misordering is harmless.  If we start
	polling before the insertion is complete, all that happens
	is that the first few polls have no chance of seeing a lock
	grant.

o	Ordering the polling against the initialization -- the above
	xchg() is already doing that for us.

So what is its purpose?

> +	/* Wait until the lock holder passes the lock down */
> +	while (!ACCESS_ONCE(node->locked))
> +		arch_mutex_cpu_relax();

On the other hand, I don't see how we get away without a barrier here.
As written, what prevents the caller's load from ->owner from being
reordered with the above load from ->locked?  (Perhaps you can argue
that such reordering is only a performance problem, but if so we need
that argument recorded in comments.)

Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
they will need a memory barrier here to prevent the critical section
from leaking out.

> +}
> +
> +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +{
> +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> +
> +	if (likely(!next)) {
> +		/*
> +		 * Release the lock by setting it to NULL
> +		 */
> +		if (cmpxchg(lock, node, NULL) == node)
> +			return;
> +		/* Wait until the next pointer is set */
> +		while (!(next = ACCESS_ONCE(node->next)))
> +			arch_mutex_cpu_relax();
> +	}

We need a memory barrier somewhere before here in this function,
otherwise the critical section can leak out.  I do not believe that
we can rely on the prohibition against speculative stores that Peter
Zijlstra and I have been discussing because that does not provide the
transitivity required by locking primitives.  I believe that we -could-
make the access below be an smp_store_release(), though.

Placing the barrier here (or at least not preceding the initial
fetch from node->next) has the advantage of allowing it to pair with
the xchg() in mcs_spin_lock(), though given the dependency only an
smp_read_barrier_depends() is required for that purpose.

> +	ACCESS_ONCE(next->locked) = 1;
> +	smp_wmb();

I don't see what this barrier does for us.  It is ordering the unlock
store with what, exactly?

If it really is doing something, we need a big fat comment stating what
that is, and checkpatch.pl will be happy to inform you.  ;-)

> +}
> +
> +#endif /* __LINUX_MCS_SPINLOCK_H */
> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> index bab49da..32a32e6 100644
> --- a/include/linux/mutex.h
> +++ b/include/linux/mutex.h
> @@ -46,6 +46,7 @@
>   * - detects multi-task circular deadlocks and prints out all affected
>   *   locks and tasks (and only those tasks)
>   */
> +struct mcs_spinlock;
>  struct mutex {
>  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
>  	atomic_t		count;
> @@ -55,7 +56,7 @@ struct mutex {
>  	struct task_struct	*owner;
>  #endif
>  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> -	void			*spin_mlock;	/* Spinner MCS lock */
> +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
>  #endif
>  #ifdef CONFIG_DEBUG_MUTEXES
>  	const char 		*name;
> @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
>  # define arch_mutex_cpu_relax() cpu_relax()
>  #endif
> 
> -#endif
> +#endif /* __LINUX_MUTEX_H */
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index d24105b..e08b183 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -25,6 +25,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/interrupt.h>
>  #include <linux/debug_locks.h>
> +#include <linux/mcs_spinlock.h>
> 
>  /*
>   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
>  	INIT_LIST_HEAD(&lock->wait_list);
>  	mutex_clear_owner(lock);
>  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> -	lock->spin_mlock = NULL;
> +	lock->mcs_lock = NULL;
>  #endif
> 
>  	debug_mutex_init(lock, name, key);
> @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
>   * more or less simultaneously, the spinners need to acquire a MCS lock
>   * first before spinning on the owner field.
>   *
> - * We don't inline mspin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
>   */
> -struct mspin_node {
> -	struct mspin_node *next ;
> -	int		  locked;	/* 1 if lock acquired */
> -};
> -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> -
> -static noinline
> -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> -{
> -	struct mspin_node *prev;
> -
> -	/* Init node */
> -	node->locked = 0;
> -	node->next   = NULL;
> -
> -	prev = xchg(lock, node);
> -	if (likely(prev == NULL)) {
> -		/* Lock acquired */
> -		node->locked = 1;
> -		return;
> -	}
> -	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> -		arch_mutex_cpu_relax();
> -}
> -
> -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> -{
> -	struct mspin_node *next = ACCESS_ONCE(node->next);
> -
> -	if (likely(!next)) {
> -		/*
> -		 * Release the lock by setting it to NULL
> -		 */
> -		if (cmpxchg(lock, node, NULL) == node)
> -			return;
> -		/* Wait until the next pointer is set */
> -		while (!(next = ACCESS_ONCE(node->next)))
> -			arch_mutex_cpu_relax();
> -	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> -}
> 
>  /*
>   * Mutex spinning code migrated from kernel/sched/core.c
> @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> 
>  	for (;;) {
>  		struct task_struct *owner;
> -		struct mspin_node  node;
> +		struct mcs_spinlock  node;
> 
>  		if (use_ww_ctx && ww_ctx->acquired > 0) {
>  			struct ww_mutex *ww;
> @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		 * If there's an owner, wait for it to either
>  		 * release the lock or go to sleep.
>  		 */
> -		mspin_lock(MLOCK(lock), &node);
> +		mcs_spin_lock(&lock->mcs_lock, &node);
>  		owner = ACCESS_ONCE(lock->owner);
>  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> -			mspin_unlock(MLOCK(lock), &node);
> +			mcs_spin_unlock(&lock->mcs_lock, &node);
>  			goto slowpath;
>  		}
> 
> @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  			}
> 
>  			mutex_set_owner(lock);
> -			mspin_unlock(MLOCK(lock), &node);
> +			mcs_spin_unlock(&lock->mcs_lock, &node);
>  			preempt_enable();
>  			return 0;
>  		}
> -		mspin_unlock(MLOCK(lock), &node);
> +		mcs_spin_unlock(&lock->mcs_lock, &node);
> 
>  		/*
>  		 * When there's no owner, we might have preempted between the
> -- 
> 1.7.4.4
> 
> 
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
  2013-11-08 19:52 ` [PATCH v5 2/4] MCS Lock: optimizations and extra comments Tim Chen
@ 2013-11-19 19:13   ` Paul E. McKenney
  2013-11-19 19:42     ` Tim Chen
  2013-11-19 22:57     ` Tim Chen
  0 siblings, 2 replies; 16+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:13 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> From: Jason Low <jason.low2@hp.com>
> 
> Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
> check in mcs_spin_unlock() likely() as it is likely that a race did not occur
> most of the time.
> 
> Also add in more comments describing how the local node is used in MCS locks.
> 
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Jason Low <jason.low2@hp.com>
> ---
>  include/linux/mcs_spinlock.h |   13 +++++++++++--
>  1 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> index b5de3b0..96f14299 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/include/linux/mcs_spinlock.h
> @@ -18,6 +18,12 @@ struct mcs_spinlock {
>  };
> 
>  /*
> + * In order to acquire the lock, the caller should declare a local node and
> + * pass a reference of the node to this function in addition to the lock.
> + * If the lock has already been acquired, then this will proceed to spin
> + * on this node->locked until the previous lock holder sets the node->locked
> + * in mcs_spin_unlock().
> + *
>   * We don't inline mcs_spin_lock() so that perf can correctly account for the
>   * time spent in this lock function.
>   */
> @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	prev = xchg(lock, node);
>  	if (likely(prev == NULL)) {
>  		/* Lock acquired */
> -		node->locked = 1;

Agreed, no one looks at this field in this case, so no need to initialize
it, unless for debug purposes.

>  		return;
>  	}
>  	ACCESS_ONCE(prev->next) = node;
> @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  		arch_mutex_cpu_relax();
>  }
> 
> +/*
> + * Releases the lock. The caller should pass in the corresponding node that
> + * was used to acquire the lock.
> + */
>  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  		/*
>  		 * Release the lock by setting it to NULL
>  		 */
> -		if (cmpxchg(lock, node, NULL) == node)
> +		if (likely(cmpxchg(lock, node, NULL) == node))

Agreed here as well.  Takes a narrow race to hit this.

So, did your testing exercise this path?  If the answer is "yes", and
if the issues that I called out in patch #1 are resolved:

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

>  			return;
>  		/* Wait until the next pointer is set */
>  		while (!(next = ACCESS_ONCE(node->next)))
> -- 
> 1.7.4.4
> 
> 
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file
  2013-11-08 19:52 ` [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file Tim Chen
@ 2013-11-19 19:15   ` Paul E. McKenney
  0 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:15 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:52:15AM -0800, Tim Chen wrote:
> From: Waiman Long <Waiman.Long@hp.com>
> 
> The following changes are made:
> 
> 1) Create a new mcs_spinlock.c file to contain the
>    mcs_spin_lock() and mcs_spin_unlock() function.
> 2) Include a number of prerequisite header files and define
>    arch_mutex_cpu_relax(), if not previously defined so the
>    mcs functions can be compiled for multiple architecture without
>    causing problems.
> 
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>

Assuming issues called out in patch #1 are resolved and testing called
out for patch #2 has happened:

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> ---
>  include/linux/mcs_spinlock.h                       |   56 ++------------------
>  kernel/locking/Makefile                            |    6 +-
>  .../locking/mcs_spinlock.c                         |   31 ++++++-----
>  3 files changed, 23 insertions(+), 70 deletions(-)
>  copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%)
> 
> diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> index 96f14299..d54bb23 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/include/linux/mcs_spinlock.h
> @@ -17,57 +17,9 @@ struct mcs_spinlock {
>  	int locked; /* 1 if lock acquired */
>  };
> 
> -/*
> - * In order to acquire the lock, the caller should declare a local node and
> - * pass a reference of the node to this function in addition to the lock.
> - * If the lock has already been acquired, then this will proceed to spin
> - * on this node->locked until the previous lock holder sets the node->locked
> - * in mcs_spin_unlock().
> - *
> - * We don't inline mcs_spin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
> - */
> -static noinline
> -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> -{
> -	struct mcs_spinlock *prev;
> -
> -	/* Init node */
> -	node->locked = 0;
> -	node->next   = NULL;
> -
> -	prev = xchg(lock, node);
> -	if (likely(prev == NULL)) {
> -		/* Lock acquired */
> -		return;
> -	}
> -	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> -		arch_mutex_cpu_relax();
> -}
> -
> -/*
> - * Releases the lock. The caller should pass in the corresponding node that
> - * was used to acquire the lock.
> - */
> -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> -{
> -	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> -
> -	if (likely(!next)) {
> -		/*
> -		 * Release the lock by setting it to NULL
> -		 */
> -		if (likely(cmpxchg(lock, node, NULL) == node))
> -			return;
> -		/* Wait until the next pointer is set */
> -		while (!(next = ACCESS_ONCE(node->next)))
> -			arch_mutex_cpu_relax();
> -	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> -}
> +extern
> +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
> +extern
> +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node);
> 
>  #endif /* __LINUX_MCS_SPINLOCK_H */
> diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
> index baab8e5..20d9d5c 100644
> --- a/kernel/locking/Makefile
> +++ b/kernel/locking/Makefile
> @@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o
>  ifeq ($(CONFIG_PROC_FS),y)
>  obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
>  endif
> -obj-$(CONFIG_SMP) += spinlock.o
> -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
> +obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o
> +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o
>  obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
>  obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
>  obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
> -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
> +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o
>  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
>  obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
>  obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
> diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c
> similarity index 78%
> copy from include/linux/mcs_spinlock.h
> copy to kernel/locking/mcs_spinlock.c
> index 96f14299..b6f27f8 100644
> --- a/include/linux/mcs_spinlock.h
> +++ b/kernel/locking/mcs_spinlock.c
> @@ -1,7 +1,5 @@
>  /*
> - * MCS lock defines
> - *
> - * This file contains the main data structure and API definitions of MCS lock.
> + * MCS lock
>   *
>   * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
>   * with the desirable properties of being fair, and with each cpu trying
> @@ -9,13 +7,20 @@
>   * It avoids expensive cache bouncings that common test-and-set spin-lock
>   * implementations incur.
>   */
> -#ifndef __LINUX_MCS_SPINLOCK_H
> -#define __LINUX_MCS_SPINLOCK_H
> +/*
> + * asm/processor.h may define arch_mutex_cpu_relax().
> + * If it is not defined, cpu_relax() will be used.
> + */
> +#include <asm/barrier.h>
> +#include <asm/cmpxchg.h>
> +#include <asm/processor.h>
> +#include <linux/compiler.h>
> +#include <linux/mcs_spinlock.h>
> +#include <linux/export.h>
> 
> -struct mcs_spinlock {
> -	struct mcs_spinlock *next;
> -	int locked; /* 1 if lock acquired */
> -};
> +#ifndef arch_mutex_cpu_relax
> +# define arch_mutex_cpu_relax() cpu_relax()
> +#endif
> 
>  /*
>   * In order to acquire the lock, the caller should declare a local node and
> @@ -23,11 +28,7 @@ struct mcs_spinlock {
>   * If the lock has already been acquired, then this will proceed to spin
>   * on this node->locked until the previous lock holder sets the node->locked
>   * in mcs_spin_unlock().
> - *
> - * We don't inline mcs_spin_lock() so that perf can correctly account for the
> - * time spent in this lock function.
>   */
> -static noinline
>  void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *prev;
> @@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	while (!ACCESS_ONCE(node->locked))
>  		arch_mutex_cpu_relax();
>  }
> +EXPORT_SYMBOL_GPL(mcs_spin_lock);
> 
>  /*
>   * Releases the lock. The caller should pass in the corresponding node that
> @@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  	ACCESS_ONCE(next->locked) = 1;
>  	smp_wmb();
>  }
> -
> -#endif /* __LINUX_MCS_SPINLOCK_H */
> +EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> -- 
> 1.7.4.4
> 
> 
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-08 19:52 ` [PATCH v5 4/4] MCS Lock: Barrier corrections Tim Chen
       [not found]   ` <20131111181049.GL28302@mudshark.cambridge.arm.com>
@ 2013-11-19 19:21   ` Paul E. McKenney
  2013-11-19 19:46     ` Tim Chen
  1 sibling, 1 reply; 16+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:21 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote:
> From: Waiman Long <Waiman.Long@hp.com>
> 
> This patch corrects the way memory barriers are used in the MCS lock
> with smp_load_acquire and smp_store_release fucnction.
> It removes ones that are not needed.
> 
> It uses architecture specific load-acquire and store-release
> primitives for synchronization, if available. Generic implementations
> are provided in case they are not defined even though they may not
> be optimal. These generic implementation could be removed later on
> once changes are made in all the relevant header files.
> 
> Suggested-by: Michel Lespinasse <walken@google.com>
> Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> Signed-off-by: Jason Low <jason.low2@hp.com>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>

Please see comments below.

							Thanx, Paul

> ---
>  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
>  1 files changed, 41 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> index b6f27f8..df5c167 100644
> --- a/kernel/locking/mcs_spinlock.c
> +++ b/kernel/locking/mcs_spinlock.c
> @@ -23,6 +23,31 @@
>  #endif
> 
>  /*
> + * Fall back to use the regular atomic operations and memory barrier if
> + * the acquire/release versions are not defined.
> + */
> +#ifndef	xchg_acquire
> +# define xchg_acquire(p, v)		xchg(p, v)
> +#endif
> +
> +#ifndef	smp_load_acquire
> +# define smp_load_acquire(p)				\
> +	({						\
> +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> +		smp_mb();				\
> +		__v;					\
> +	})
> +#endif
> +
> +#ifndef smp_store_release
> +# define smp_store_release(p, v)		\
> +	do {					\
> +		smp_mb();			\
> +		ACCESS_ONCE(*(p)) = v;		\
> +	} while (0)
> +#endif
> +
> +/*
>   * In order to acquire the lock, the caller should declare a local node and
>   * pass a reference of the node to this function in addition to the lock.
>   * If the lock has already been acquired, then this will proceed to spin
> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  	node->locked = 0;
>  	node->next   = NULL;
> 
> -	prev = xchg(lock, node);
> +	/* xchg() provides a memory barrier */
> +	prev = xchg_acquire(lock, node);

But if this is xchg_acquire() with only acquire semantics, it need not
ensure that the initializations of node->locked and node->next above
will happen before the "ACCESS_ONCE(prev->next) = node" below.  This
therefore needs to remain xchg().  Or you need an smp_store_release()
below instead of an ACCESS_ONCE() assignment.

As currently written, the poor CPU doing the unlock can be fatally
disappointed by seeing pre-initialized values of ->locked and ->next.
This could, among other things, result in a hang where the handoff
happens before the initialization.

>  	if (likely(prev == NULL)) {
>  		/* Lock acquired */
>  		return;
>  	}
>  	ACCESS_ONCE(prev->next) = node;
> -	smp_wmb();
> -	/* Wait until the lock holder passes the lock down */
> -	while (!ACCESS_ONCE(node->locked))
> +	/*
> +	 * Wait until the lock holder passes the lock down.
> +	 * Using smp_load_acquire() provides a memory barrier that
> +	 * ensures subsequent operations happen after the lock is acquired.
> +	 */
> +	while (!(smp_load_acquire(&node->locked)))
>  		arch_mutex_cpu_relax();

OK, this smp_load_acquire() makes sense!

>  }
>  EXPORT_SYMBOL_GPL(mcs_spin_lock);
> @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
>   * Releases the lock. The caller should pass in the corresponding node that
>   * was used to acquire the lock.
>   */
> -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
>  {
>  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> 
> @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
>  		while (!(next = ACCESS_ONCE(node->next)))
>  			arch_mutex_cpu_relax();
>  	}
> -	ACCESS_ONCE(next->locked) = 1;
> -	smp_wmb();
> +	/*
> +	 * Pass lock to next waiter.
> +	 * smp_store_release() provides a memory barrier to ensure
> +	 * all operations in the critical section has been completed
> +	 * before unlocking.
> +	 */
> +	smp_store_release(&next->locked , 1);

This smp_store_release() makes sense as well!

Could you please get rid of the extraneous space before the comma?

>  }
>  EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> -- 
> 1.7.4.4
> 
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
  2013-11-19 19:10   ` Paul E. McKenney
@ 2013-11-19 19:42     ` Tim Chen
  2013-11-19 19:54       ` Paul E. McKenney
  0 siblings, 1 reply; 16+ messages in thread
From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> > We will need the MCS lock code for doing optimistic spinning for rwsem
> > and queue rwlock.  Extracting the MCS code from mutex.c and put into
> > its own file allow us to reuse this code easily.
> > 
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
> 
> Please see comments below.
> 

Thanks for reviewing the code.

> 							Thanx, Paul
> 
> > ---
> >  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
> >  include/linux/mutex.h        |    5 ++-
> >  kernel/locking/mutex.c       |   60 ++++----------------------------------
> >  3 files changed, 74 insertions(+), 55 deletions(-)
> >  create mode 100644 include/linux/mcs_spinlock.h
> > 
> > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > new file mode 100644
> > index 0000000..b5de3b0
> > --- /dev/null
> > +++ b/include/linux/mcs_spinlock.h
> > @@ -0,0 +1,64 @@
> > +/*
> > + * MCS lock defines
> > + *
> > + * This file contains the main data structure and API definitions of MCS lock.
> > + *
> > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> > + * with the desirable properties of being fair, and with each cpu trying
> > + * to acquire the lock spinning on a local variable.
> > + * It avoids expensive cache bouncings that common test-and-set spin-lock
> > + * implementations incur.
> > + */
> > +#ifndef __LINUX_MCS_SPINLOCK_H
> > +#define __LINUX_MCS_SPINLOCK_H
> > +
> > +struct mcs_spinlock {
> > +	struct mcs_spinlock *next;
> > +	int locked; /* 1 if lock acquired */
> > +};
> > +
> > +/*
> > + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> > + * time spent in this lock function.
> > + */
> > +static noinline
> > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +{
> > +	struct mcs_spinlock *prev;
> > +
> > +	/* Init node */
> > +	node->locked = 0;
> > +	node->next   = NULL;
> > +
> > +	prev = xchg(lock, node);
> 
> OK, the full memory barriers implied by xchg() ensure that *node will be
> initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
> node into the list.  This rules out the misordering scenario that Tim
> Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
> on September 27th.
> 
> Assuming of course a corresponding barrier on the lock handoff side.
> 
> > +	if (likely(prev == NULL)) {
> > +		/* Lock acquired */
> > +		node->locked = 1;
> > +		return;
> > +	}
> > +	ACCESS_ONCE(prev->next) = node;
> > +	smp_wmb();
> 
> I don't see what the above memory barrier does.  Here are some things
> that it cannot be doing:
> 
> o	Ordering the insertion into the list above with the polling
> 	below.  First, smp_wmb() does not order prior writes against
> 	later reads, and second misordering is harmless.  If we start
> 	polling before the insertion is complete, all that happens
> 	is that the first few polls have no chance of seeing a lock
> 	grant.
> 
> o	Ordering the polling against the initialization -- the above
> 	xchg() is already doing that for us.
> 
> So what is its purpose?

Agree that the smp_wmb is not needed.  It is in the existing mcs code
residing in mutex.c and we're re-factoring the code only here and hasn't
corrected the memory barrier.

The particular smp_wmb() is removed in Patch 4/4 that corrects the
memory barriers.

> 
> > +	/* Wait until the lock holder passes the lock down */
> > +	while (!ACCESS_ONCE(node->locked))
> > +		arch_mutex_cpu_relax();
> 
> On the other hand, I don't see how we get away without a barrier here.
> As written, what prevents the caller's load from ->owner from being
> reordered with the above load from ->locked?  (Perhaps you can argue
> that such reordering is only a performance problem, but if so we need
> that argument recorded in comments.)
> 
> Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
> they will need a memory barrier here to prevent the critical section
> from leaking out.

Agree too.  The appropriate memory barrier is added in Patch 4/4.

> 
> > +}
> > +
> > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +{
> > +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > +
> > +	if (likely(!next)) {
> > +		/*
> > +		 * Release the lock by setting it to NULL
> > +		 */
> > +		if (cmpxchg(lock, node, NULL) == node)
> > +			return;
> > +		/* Wait until the next pointer is set */
> > +		while (!(next = ACCESS_ONCE(node->next)))
> > +			arch_mutex_cpu_relax();
> > +	}
> 
> We need a memory barrier somewhere before here in this function,
> otherwise the critical section can leak out.  I do not believe that
> we can rely on the prohibition against speculative stores that Peter
> Zijlstra and I have been discussing because that does not provide the
> transitivity required by locking primitives.  I believe that we -could-
> make the access below be an smp_store_release(), though.
> 
> Placing the barrier here (or at least not preceding the initial
> fetch from node->next) has the advantage of allowing it to pair with
> the xchg() in mcs_spin_lock(), though given the dependency only an
> smp_read_barrier_depends() is required for that purpose.
> 
> > +	ACCESS_ONCE(next->locked) = 1;
> > +	smp_wmb();
> 
> I don't see what this barrier does for us.  It is ordering the unlock
> store with what, exactly?
> 
> If it really is doing something, we need a big fat comment stating what
> that is, and checkpatch.pl will be happy to inform you.  ;-)
> 
> > +}
> > +
> > +#endif /* __LINUX_MCS_SPINLOCK_H */
> > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > index bab49da..32a32e6 100644
> > --- a/include/linux/mutex.h
> > +++ b/include/linux/mutex.h
> > @@ -46,6 +46,7 @@
> >   * - detects multi-task circular deadlocks and prints out all affected
> >   *   locks and tasks (and only those tasks)
> >   */
> > +struct mcs_spinlock;
> >  struct mutex {
> >  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
> >  	atomic_t		count;
> > @@ -55,7 +56,7 @@ struct mutex {
> >  	struct task_struct	*owner;
> >  #endif
> >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > -	void			*spin_mlock;	/* Spinner MCS lock */
> > +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
> >  #endif
> >  #ifdef CONFIG_DEBUG_MUTEXES
> >  	const char 		*name;
> > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
> >  # define arch_mutex_cpu_relax() cpu_relax()
> >  #endif
> > 
> > -#endif
> > +#endif /* __LINUX_MUTEX_H */
> > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> > index d24105b..e08b183 100644
> > --- a/kernel/locking/mutex.c
> > +++ b/kernel/locking/mutex.c
> > @@ -25,6 +25,7 @@
> >  #include <linux/spinlock.h>
> >  #include <linux/interrupt.h>
> >  #include <linux/debug_locks.h>
> > +#include <linux/mcs_spinlock.h>
> > 
> >  /*
> >   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
> >  	INIT_LIST_HEAD(&lock->wait_list);
> >  	mutex_clear_owner(lock);
> >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > -	lock->spin_mlock = NULL;
> > +	lock->mcs_lock = NULL;
> >  #endif
> > 
> >  	debug_mutex_init(lock, name, key);
> > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
> >   * more or less simultaneously, the spinners need to acquire a MCS lock
> >   * first before spinning on the owner field.
> >   *
> > - * We don't inline mspin_lock() so that perf can correctly account for the
> > - * time spent in this lock function.
> >   */
> > -struct mspin_node {
> > -	struct mspin_node *next ;
> > -	int		  locked;	/* 1 if lock acquired */
> > -};
> > -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> > -
> > -static noinline
> > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> > -{
> > -	struct mspin_node *prev;
> > -
> > -	/* Init node */
> > -	node->locked = 0;
> > -	node->next   = NULL;
> > -
> > -	prev = xchg(lock, node);
> > -	if (likely(prev == NULL)) {
> > -		/* Lock acquired */
> > -		node->locked = 1;
> > -		return;
> > -	}
> > -	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > -		arch_mutex_cpu_relax();
> > -}
> > -
> > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> > -{
> > -	struct mspin_node *next = ACCESS_ONCE(node->next);
> > -
> > -	if (likely(!next)) {
> > -		/*
> > -		 * Release the lock by setting it to NULL
> > -		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > -			return;
> > -		/* Wait until the next pointer is set */
> > -		while (!(next = ACCESS_ONCE(node->next)))
> > -			arch_mutex_cpu_relax();
> > -	}
> > -	ACCESS_ONCE(next->locked) = 1;
> > -	smp_wmb();
> > -}
> > 
> >  /*
> >   * Mutex spinning code migrated from kernel/sched/core.c
> > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > 
> >  	for (;;) {
> >  		struct task_struct *owner;
> > -		struct mspin_node  node;
> > +		struct mcs_spinlock  node;
> > 
> >  		if (use_ww_ctx && ww_ctx->acquired > 0) {
> >  			struct ww_mutex *ww;
> > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >  		 * If there's an owner, wait for it to either
> >  		 * release the lock or go to sleep.
> >  		 */
> > -		mspin_lock(MLOCK(lock), &node);
> > +		mcs_spin_lock(&lock->mcs_lock, &node);
> >  		owner = ACCESS_ONCE(lock->owner);
> >  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> > -			mspin_unlock(MLOCK(lock), &node);
> > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> >  			goto slowpath;
> >  		}
> > 
> > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> >  			}
> > 
> >  			mutex_set_owner(lock);
> > -			mspin_unlock(MLOCK(lock), &node);
> > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> >  			preempt_enable();
> >  			return 0;
> >  		}
> > -		mspin_unlock(MLOCK(lock), &node);
> > +		mcs_spin_unlock(&lock->mcs_lock, &node);
> > 
> >  		/*
> >  		 * When there's no owner, we might have preempted between the
> > -- 
> > 1.7.4.4
> > 
> > 
> > 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
  2013-11-19 19:13   ` Paul E. McKenney
@ 2013-11-19 19:42     ` Tim Chen
  2013-11-19 22:57     ` Tim Chen
  1 sibling, 0 replies; 16+ messages in thread
From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > From: Jason Low <jason.low2@hp.com>
> > 
> > Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node
> > check in mcs_spin_unlock() likely() as it is likely that a race did not occur
> > most of the time.
> > 
> > Also add in more comments describing how the local node is used in MCS locks.
> > 
> > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > ---
> >  include/linux/mcs_spinlock.h |   13 +++++++++++--
> >  1 files changed, 11 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > index b5de3b0..96f14299 100644
> > --- a/include/linux/mcs_spinlock.h
> > +++ b/include/linux/mcs_spinlock.h
> > @@ -18,6 +18,12 @@ struct mcs_spinlock {
> >  };
> > 
> >  /*
> > + * In order to acquire the lock, the caller should declare a local node and
> > + * pass a reference of the node to this function in addition to the lock.
> > + * If the lock has already been acquired, then this will proceed to spin
> > + * on this node->locked until the previous lock holder sets the node->locked
> > + * in mcs_spin_unlock().
> > + *
> >   * We don't inline mcs_spin_lock() so that perf can correctly account for the
> >   * time spent in this lock function.
> >   */
> > @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	prev = xchg(lock, node);
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> > -		node->locked = 1;
> 
> Agreed, no one looks at this field in this case, so no need to initialize
> it, unless for debug purposes.
> 
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  		arch_mutex_cpu_relax();
> >  }
> > 
> > +/*
> > + * Releases the lock. The caller should pass in the corresponding node that
> > + * was used to acquire the lock.
> > + */
> >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		/*
> >  		 * Release the lock by setting it to NULL
> >  		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > +		if (likely(cmpxchg(lock, node, NULL) == node))
> 
> Agreed here as well.  Takes a narrow race to hit this.
> 
> So, did your testing exercise this path?  If the answer is "yes", and
> if the issues that I called out in patch #1 are resolved:

I haven't instrumented the code to check the hit rate of this path. But
the slow path probably will only get hit in some cases under 
heavy contention. 


> 
> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> >  			return;
> >  		/* Wait until the next pointer is set */
> >  		while (!(next = ACCESS_ONCE(node->next)))
> > -- 
> > 1.7.4.4
> > 
> > 
> > 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections
  2013-11-19 19:21   ` Paul E. McKenney
@ 2013-11-19 19:46     ` Tim Chen
  0 siblings, 0 replies; 16+ messages in thread
From: Tim Chen @ 2013-11-19 19:46 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:21 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote:
> > From: Waiman Long <Waiman.Long@hp.com>
> > 
> > This patch corrects the way memory barriers are used in the MCS lock
> > with smp_load_acquire and smp_store_release fucnction.
> > It removes ones that are not needed.
> > 
> > It uses architecture specific load-acquire and store-release
> > primitives for synchronization, if available. Generic implementations
> > are provided in case they are not defined even though they may not
> > be optimal. These generic implementation could be removed later on
> > once changes are made in all the relevant header files.
> > 
> > Suggested-by: Michel Lespinasse <walken@google.com>
> > Signed-off-by: Waiman Long <Waiman.Long@hp.com>
> > Signed-off-by: Jason Low <jason.low2@hp.com>
> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> 
> Please see comments below.
> 
> 							Thanx, Paul
> 
> > ---
> >  kernel/locking/mcs_spinlock.c |   48 +++++++++++++++++++++++++++++++++++------
> >  1 files changed, 41 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
> > index b6f27f8..df5c167 100644
> > --- a/kernel/locking/mcs_spinlock.c
> > +++ b/kernel/locking/mcs_spinlock.c
> > @@ -23,6 +23,31 @@
> >  #endif
> > 
> >  /*
> > + * Fall back to use the regular atomic operations and memory barrier if
> > + * the acquire/release versions are not defined.
> > + */
> > +#ifndef	xchg_acquire
> > +# define xchg_acquire(p, v)		xchg(p, v)
> > +#endif
> > +
> > +#ifndef	smp_load_acquire
> > +# define smp_load_acquire(p)				\
> > +	({						\
> > +		typeof(*p) __v = ACCESS_ONCE(*(p));	\
> > +		smp_mb();				\
> > +		__v;					\
> > +	})
> > +#endif
> > +
> > +#ifndef smp_store_release
> > +# define smp_store_release(p, v)		\
> > +	do {					\
> > +		smp_mb();			\
> > +		ACCESS_ONCE(*(p)) = v;		\
> > +	} while (0)
> > +#endif
> > +
> > +/*
> >   * In order to acquire the lock, the caller should declare a local node and
> >   * pass a reference of the node to this function in addition to the lock.
> >   * If the lock has already been acquired, then this will proceed to spin
> > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  	node->locked = 0;
> >  	node->next   = NULL;
> > 
> > -	prev = xchg(lock, node);
> > +	/* xchg() provides a memory barrier */
> > +	prev = xchg_acquire(lock, node);
> 
> But if this is xchg_acquire() with only acquire semantics, it need not
> ensure that the initializations of node->locked and node->next above
> will happen before the "ACCESS_ONCE(prev->next) = node" below.  This
> therefore needs to remain xchg().  Or you need an smp_store_release()
> below instead of an ACCESS_ONCE() assignment.

Good point.  Will keep it as xchg.

> 
> As currently written, the poor CPU doing the unlock can be fatally
> disappointed by seeing pre-initialized values of ->locked and ->next.
> This could, among other things, result in a hang where the handoff
> happens before the initialization.
> 
> >  	if (likely(prev == NULL)) {
> >  		/* Lock acquired */
> >  		return;
> >  	}
> >  	ACCESS_ONCE(prev->next) = node;
> > -	smp_wmb();
> > -	/* Wait until the lock holder passes the lock down */
> > -	while (!ACCESS_ONCE(node->locked))
> > +	/*
> > +	 * Wait until the lock holder passes the lock down.
> > +	 * Using smp_load_acquire() provides a memory barrier that
> > +	 * ensures subsequent operations happen after the lock is acquired.
> > +	 */
> > +	while (!(smp_load_acquire(&node->locked)))
> >  		arch_mutex_cpu_relax();
> 
> OK, this smp_load_acquire() makes sense!
> 
> >  }
> >  EXPORT_SYMBOL_GPL(mcs_spin_lock);
> > @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock);
> >   * Releases the lock. The caller should pass in the corresponding node that
> >   * was used to acquire the lock.
> >   */
> > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > 
> > @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		while (!(next = ACCESS_ONCE(node->next)))
> >  			arch_mutex_cpu_relax();
> >  	}
> > -	ACCESS_ONCE(next->locked) = 1;
> > -	smp_wmb();
> > +	/*
> > +	 * Pass lock to next waiter.
> > +	 * smp_store_release() provides a memory barrier to ensure
> > +	 * all operations in the critical section has been completed
> > +	 * before unlocking.
> > +	 */
> > +	smp_store_release(&next->locked , 1);
> 
> This smp_store_release() makes sense as well!
> 
> Could you please get rid of the extraneous space before the comma?

Will do.

> 
> >  }
> >  EXPORT_SYMBOL_GPL(mcs_spin_unlock);
> > -- 
> > 1.7.4.4
> > 
> > 
> 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file
  2013-11-19 19:42     ` Tim Chen
@ 2013-11-19 19:54       ` Paul E. McKenney
  0 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2013-11-19 19:54 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, Nov 19, 2013 at 11:42:32AM -0800, Tim Chen wrote:
> On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote:
> > On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote:
> > > We will need the MCS lock code for doing optimistic spinning for rwsem
> > > and queue rwlock.  Extracting the MCS code from mutex.c and put into
> > > its own file allow us to reuse this code easily.
> > > 
> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> > > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
> > 
> > Please see comments below.
> > 
> 
> Thanks for reviewing the code.
> 
> > 							Thanx, Paul
> > 
> > > ---
> > >  include/linux/mcs_spinlock.h |   64 ++++++++++++++++++++++++++++++++++++++++++
> > >  include/linux/mutex.h        |    5 ++-
> > >  kernel/locking/mutex.c       |   60 ++++----------------------------------
> > >  3 files changed, 74 insertions(+), 55 deletions(-)
> > >  create mode 100644 include/linux/mcs_spinlock.h
> > > 
> > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h
> > > new file mode 100644
> > > index 0000000..b5de3b0
> > > --- /dev/null
> > > +++ b/include/linux/mcs_spinlock.h
> > > @@ -0,0 +1,64 @@
> > > +/*
> > > + * MCS lock defines
> > > + *
> > > + * This file contains the main data structure and API definitions of MCS lock.
> > > + *
> > > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock
> > > + * with the desirable properties of being fair, and with each cpu trying
> > > + * to acquire the lock spinning on a local variable.
> > > + * It avoids expensive cache bouncings that common test-and-set spin-lock
> > > + * implementations incur.
> > > + */
> > > +#ifndef __LINUX_MCS_SPINLOCK_H
> > > +#define __LINUX_MCS_SPINLOCK_H
> > > +
> > > +struct mcs_spinlock {
> > > +	struct mcs_spinlock *next;
> > > +	int locked; /* 1 if lock acquired */
> > > +};
> > > +
> > > +/*
> > > + * We don't inline mcs_spin_lock() so that perf can correctly account for the
> > > + * time spent in this lock function.
> > > + */
> > > +static noinline
> > > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > > +{
> > > +	struct mcs_spinlock *prev;
> > > +
> > > +	/* Init node */
> > > +	node->locked = 0;
> > > +	node->next   = NULL;
> > > +
> > > +	prev = xchg(lock, node);
> > 
> > OK, the full memory barriers implied by xchg() ensure that *node will be
> > initialized before the "ACCESS_ONCE(prev->next) = node" below puts the
> > node into the list.  This rules out the misordering scenario that Tim
> > Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK>
> > on September 27th.
> > 
> > Assuming of course a corresponding barrier on the lock handoff side.
> > 
> > > +	if (likely(prev == NULL)) {
> > > +		/* Lock acquired */
> > > +		node->locked = 1;
> > > +		return;
> > > +	}
> > > +	ACCESS_ONCE(prev->next) = node;
> > > +	smp_wmb();
> > 
> > I don't see what the above memory barrier does.  Here are some things
> > that it cannot be doing:
> > 
> > o	Ordering the insertion into the list above with the polling
> > 	below.  First, smp_wmb() does not order prior writes against
> > 	later reads, and second misordering is harmless.  If we start
> > 	polling before the insertion is complete, all that happens
> > 	is that the first few polls have no chance of seeing a lock
> > 	grant.
> > 
> > o	Ordering the polling against the initialization -- the above
> > 	xchg() is already doing that for us.
> > 
> > So what is its purpose?
> 
> Agree that the smp_wmb is not needed.  It is in the existing mcs code
> residing in mutex.c and we're re-factoring the code only here and hasn't
> corrected the memory barrier.

Ah, so I should have been more aggressive about reviewing some time back,
then...  ;-)

							Thanx, Paul

> The particular smp_wmb() is removed in Patch 4/4 that corrects the
> memory barriers.
> 
> > 
> > > +	/* Wait until the lock holder passes the lock down */
> > > +	while (!ACCESS_ONCE(node->locked))
> > > +		arch_mutex_cpu_relax();
> > 
> > On the other hand, I don't see how we get away without a barrier here.
> > As written, what prevents the caller's load from ->owner from being
> > reordered with the above load from ->locked?  (Perhaps you can argue
> > that such reordering is only a performance problem, but if so we need
> > that argument recorded in comments.)
> > 
> > Of course, if anyone ever tries to use mcs_spin_lock() as a full lock,
> > they will need a memory barrier here to prevent the critical section
> > from leaking out.
> 
> Agree too.  The appropriate memory barrier is added in Patch 4/4.
> 
> > 
> > > +}
> > > +
> > > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > > +{
> > > +	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > > +
> > > +	if (likely(!next)) {
> > > +		/*
> > > +		 * Release the lock by setting it to NULL
> > > +		 */
> > > +		if (cmpxchg(lock, node, NULL) == node)
> > > +			return;
> > > +		/* Wait until the next pointer is set */
> > > +		while (!(next = ACCESS_ONCE(node->next)))
> > > +			arch_mutex_cpu_relax();
> > > +	}
> > 
> > We need a memory barrier somewhere before here in this function,
> > otherwise the critical section can leak out.  I do not believe that
> > we can rely on the prohibition against speculative stores that Peter
> > Zijlstra and I have been discussing because that does not provide the
> > transitivity required by locking primitives.  I believe that we -could-
> > make the access below be an smp_store_release(), though.
> > 
> > Placing the barrier here (or at least not preceding the initial
> > fetch from node->next) has the advantage of allowing it to pair with
> > the xchg() in mcs_spin_lock(), though given the dependency only an
> > smp_read_barrier_depends() is required for that purpose.
> > 
> > > +	ACCESS_ONCE(next->locked) = 1;
> > > +	smp_wmb();
> > 
> > I don't see what this barrier does for us.  It is ordering the unlock
> > store with what, exactly?
> > 
> > If it really is doing something, we need a big fat comment stating what
> > that is, and checkpatch.pl will be happy to inform you.  ;-)
> > 
> > > +}
> > > +
> > > +#endif /* __LINUX_MCS_SPINLOCK_H */
> > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> > > index bab49da..32a32e6 100644
> > > --- a/include/linux/mutex.h
> > > +++ b/include/linux/mutex.h
> > > @@ -46,6 +46,7 @@
> > >   * - detects multi-task circular deadlocks and prints out all affected
> > >   *   locks and tasks (and only those tasks)
> > >   */
> > > +struct mcs_spinlock;
> > >  struct mutex {
> > >  	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
> > >  	atomic_t		count;
> > > @@ -55,7 +56,7 @@ struct mutex {
> > >  	struct task_struct	*owner;
> > >  #endif
> > >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > > -	void			*spin_mlock;	/* Spinner MCS lock */
> > > +	struct mcs_spinlock	*mcs_lock;	/* Spinner MCS lock */
> > >  #endif
> > >  #ifdef CONFIG_DEBUG_MUTEXES
> > >  	const char 		*name;
> > > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
> > >  # define arch_mutex_cpu_relax() cpu_relax()
> > >  #endif
> > > 
> > > -#endif
> > > +#endif /* __LINUX_MUTEX_H */
> > > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> > > index d24105b..e08b183 100644
> > > --- a/kernel/locking/mutex.c
> > > +++ b/kernel/locking/mutex.c
> > > @@ -25,6 +25,7 @@
> > >  #include <linux/spinlock.h>
> > >  #include <linux/interrupt.h>
> > >  #include <linux/debug_locks.h>
> > > +#include <linux/mcs_spinlock.h>
> > > 
> > >  /*
> > >   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> > > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
> > >  	INIT_LIST_HEAD(&lock->wait_list);
> > >  	mutex_clear_owner(lock);
> > >  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> > > -	lock->spin_mlock = NULL;
> > > +	lock->mcs_lock = NULL;
> > >  #endif
> > > 
> > >  	debug_mutex_init(lock, name, key);
> > > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock);
> > >   * more or less simultaneously, the spinners need to acquire a MCS lock
> > >   * first before spinning on the owner field.
> > >   *
> > > - * We don't inline mspin_lock() so that perf can correctly account for the
> > > - * time spent in this lock function.
> > >   */
> > > -struct mspin_node {
> > > -	struct mspin_node *next ;
> > > -	int		  locked;	/* 1 if lock acquired */
> > > -};
> > > -#define	MLOCK(mutex)	((struct mspin_node **)&((mutex)->spin_mlock))
> > > -
> > > -static noinline
> > > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
> > > -{
> > > -	struct mspin_node *prev;
> > > -
> > > -	/* Init node */
> > > -	node->locked = 0;
> > > -	node->next   = NULL;
> > > -
> > > -	prev = xchg(lock, node);
> > > -	if (likely(prev == NULL)) {
> > > -		/* Lock acquired */
> > > -		node->locked = 1;
> > > -		return;
> > > -	}
> > > -	ACCESS_ONCE(prev->next) = node;
> > > -	smp_wmb();
> > > -	/* Wait until the lock holder passes the lock down */
> > > -	while (!ACCESS_ONCE(node->locked))
> > > -		arch_mutex_cpu_relax();
> > > -}
> > > -
> > > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
> > > -{
> > > -	struct mspin_node *next = ACCESS_ONCE(node->next);
> > > -
> > > -	if (likely(!next)) {
> > > -		/*
> > > -		 * Release the lock by setting it to NULL
> > > -		 */
> > > -		if (cmpxchg(lock, node, NULL) == node)
> > > -			return;
> > > -		/* Wait until the next pointer is set */
> > > -		while (!(next = ACCESS_ONCE(node->next)))
> > > -			arch_mutex_cpu_relax();
> > > -	}
> > > -	ACCESS_ONCE(next->locked) = 1;
> > > -	smp_wmb();
> > > -}
> > > 
> > >  /*
> > >   * Mutex spinning code migrated from kernel/sched/core.c
> > > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > > 
> > >  	for (;;) {
> > >  		struct task_struct *owner;
> > > -		struct mspin_node  node;
> > > +		struct mcs_spinlock  node;
> > > 
> > >  		if (use_ww_ctx && ww_ctx->acquired > 0) {
> > >  			struct ww_mutex *ww;
> > > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > >  		 * If there's an owner, wait for it to either
> > >  		 * release the lock or go to sleep.
> > >  		 */
> > > -		mspin_lock(MLOCK(lock), &node);
> > > +		mcs_spin_lock(&lock->mcs_lock, &node);
> > >  		owner = ACCESS_ONCE(lock->owner);
> > >  		if (owner && !mutex_spin_on_owner(lock, owner)) {
> > > -			mspin_unlock(MLOCK(lock), &node);
> > > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> > >  			goto slowpath;
> > >  		}
> > > 
> > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> > >  			}
> > > 
> > >  			mutex_set_owner(lock);
> > > -			mspin_unlock(MLOCK(lock), &node);
> > > +			mcs_spin_unlock(&lock->mcs_lock, &node);
> > >  			preempt_enable();
> > >  			return 0;
> > >  		}
> > > -		mspin_unlock(MLOCK(lock), &node);
> > > +		mcs_spin_unlock(&lock->mcs_lock, &node);
> > > 
> > >  		/*
> > >  		 * When there's no owner, we might have preempted between the
> > > -- 
> > > 1.7.4.4
> > > 
> > > 
> > > 
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> 
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
  2013-11-19 19:13   ` Paul E. McKenney
  2013-11-19 19:42     ` Tim Chen
@ 2013-11-19 22:57     ` Tim Chen
  2013-11-19 23:05       ` Paul E. McKenney
  1 sibling, 1 reply; 16+ messages in thread
From: Tim Chen @ 2013-11-19 22:57 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > 
> > +/*
> > + * Releases the lock. The caller should pass in the corresponding node that
> > + * was used to acquire the lock.
> > + */
> >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> >  {
> >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> >  		/*
> >  		 * Release the lock by setting it to NULL
> >  		 */
> > -		if (cmpxchg(lock, node, NULL) == node)
> > +		if (likely(cmpxchg(lock, node, NULL) == node))
> 
> Agreed here as well.  Takes a narrow race to hit this.
> 
> So, did your testing exercise this path?  If the answer is "yes", 


Paul,

I did some instrumentation and confirmed that the path in question has 
been exercised.  So this patch should be okay.

Tim

> and if the issues that I called out in patch #1 are resolved:
> 
> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> >  			return;
> >  		/* Wait until the next pointer is set */
> >  		while (!(next = ACCESS_ONCE(node->next)))
> >


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments
  2013-11-19 22:57     ` Tim Chen
@ 2013-11-19 23:05       ` Paul E. McKenney
  0 siblings, 0 replies; 16+ messages in thread
From: Paul E. McKenney @ 2013-11-19 23:05 UTC (permalink / raw)
  To: Tim Chen
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel,
	linux-mm, linux-arch, Linus Torvalds, Waiman Long,
	Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse,
	Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra,
	Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin,
	H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran,
	Scott J Norton, Will Deacon, Figo.zhang

On Tue, Nov 19, 2013 at 02:57:41PM -0800, Tim Chen wrote:
> On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote:
> > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote:
> > > 
> > > +/*
> > > + * Releases the lock. The caller should pass in the corresponding node that
> > > + * was used to acquire the lock.
> > > + */
> > >  static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
> > >  {
> > >  	struct mcs_spinlock *next = ACCESS_ONCE(node->next);
> > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod
> > >  		/*
> > >  		 * Release the lock by setting it to NULL
> > >  		 */
> > > -		if (cmpxchg(lock, node, NULL) == node)
> > > +		if (likely(cmpxchg(lock, node, NULL) == node))
> > 
> > Agreed here as well.  Takes a narrow race to hit this.
> > 
> > So, did your testing exercise this path?  If the answer is "yes", 
> 
> 
> Paul,
> 
> I did some instrumentation and confirmed that the path in question has 
> been exercised.  So this patch should be okay.

Very good!

							Thanx, Paul


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2013-11-19 23:06 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
2013-11-08 19:51 ` [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file Tim Chen
2013-11-19 19:10   ` Paul E. McKenney
2013-11-19 19:42     ` Tim Chen
2013-11-19 19:54       ` Paul E. McKenney
2013-11-08 19:52 ` [PATCH v5 2/4] MCS Lock: optimizations and extra comments Tim Chen
2013-11-19 19:13   ` Paul E. McKenney
2013-11-19 19:42     ` Tim Chen
2013-11-19 22:57     ` Tim Chen
2013-11-19 23:05       ` Paul E. McKenney
2013-11-08 19:52 ` [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file Tim Chen
2013-11-19 19:15   ` Paul E. McKenney
2013-11-08 19:52 ` [PATCH v5 4/4] MCS Lock: Barrier corrections Tim Chen
     [not found]   ` <20131111181049.GL28302@mudshark.cambridge.arm.com>
     [not found]     ` <1384204673.10046.6.camel@schen9-mobl3>
2013-11-12 14:54       ` Waiman Long
2013-11-19 19:21   ` Paul E. McKenney
2013-11-19 19:46     ` Tim Chen
     [not found] <20131112160827.GB25953@mudshark.cambridge.arm.com>
2013-11-12 17:16 ` George Spelvin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox