* [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-08 19:51 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:51 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton We will need the MCS lock code for doing optimistic spinning for rwsem and queue rwlock. Extracting the MCS code from mutex.c and put into its own file allow us to reuse this code easily. Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> --- include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ include/linux/mutex.h | 5 ++- kernel/locking/mutex.c | 60 ++++---------------------------------- 3 files changed, 74 insertions(+), 55 deletions(-) create mode 100644 include/linux/mcs_spinlock.h diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h new file mode 100644 index 0000000..b5de3b0 --- /dev/null +++ b/include/linux/mcs_spinlock.h @@ -0,0 +1,64 @@ +/* + * MCS lock defines + * + * This file contains the main data structure and API definitions of MCS lock. + * + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock + * with the desirable properties of being fair, and with each cpu trying + * to acquire the lock spinning on a local variable. + * It avoids expensive cache bouncings that common test-and-set spin-lock + * implementations incur. + */ +#ifndef __LINUX_MCS_SPINLOCK_H +#define __LINUX_MCS_SPINLOCK_H + +struct mcs_spinlock { + struct mcs_spinlock *next; + int locked; /* 1 if lock acquired */ +}; + +/* + * We don't inline mcs_spin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ +static noinline +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* Lock acquired */ + node->locked = 1; + return; + } + ACCESS_ONCE(prev->next) = node; + smp_wmb(); + /* Wait until the lock holder passes the lock down */ + while (!ACCESS_ONCE(node->locked)) + arch_mutex_cpu_relax(); +} + +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (cmpxchg(lock, node, NULL) == node) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + ACCESS_ONCE(next->locked) = 1; + smp_wmb(); +} + +#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bab49da..32a32e6 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -46,6 +46,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ +struct mcs_spinlock; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -55,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - void *spin_mlock; /* Spinner MCS lock */ + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); # define arch_mutex_cpu_relax() cpu_relax() #endif -#endif +#endif /* __LINUX_MUTEX_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d24105b..e08b183 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -25,6 +25,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> +#include <linux/mcs_spinlock.h> /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - lock->spin_mlock = NULL; + lock->mcs_lock = NULL; #endif debug_mutex_init(lock, name, key); @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); * more or less simultaneously, the spinners need to acquire a MCS lock * first before spinning on the owner field. * - * We don't inline mspin_lock() so that perf can correctly account for the - * time spent in this lock function. */ -struct mspin_node { - struct mspin_node *next ; - int locked; /* 1 if lock acquired */ -}; -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) - -static noinline -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) -{ - struct mspin_node *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* Lock acquired */ - node->locked = 1; - return; - } - ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) - arch_mutex_cpu_relax(); -} - -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) -{ - struct mspin_node *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (cmpxchg(lock, node, NULL) == node) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); -} /* * Mutex spinning code migrated from kernel/sched/core.c @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, for (;;) { struct task_struct *owner; - struct mspin_node node; + struct mcs_spinlock node; if (use_ww_ctx && ww_ctx->acquired > 0) { struct ww_mutex *ww; @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * If there's an owner, wait for it to either * release the lock or go to sleep. */ - mspin_lock(MLOCK(lock), &node); + mcs_spin_lock(&lock->mcs_lock, &node); owner = ACCESS_ONCE(lock->owner); if (owner && !mutex_spin_on_owner(lock, owner)) { - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); goto slowpath; } @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } mutex_set_owner(lock); - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); preempt_enable(); return 0; } - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); /* * When there's no owner, we might have preempted between the -- 1.7.4.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-08 19:51 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:51 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang We will need the MCS lock code for doing optimistic spinning for rwsem and queue rwlock. Extracting the MCS code from mutex.c and put into its own file allow us to reuse this code easily. Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> --- include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ include/linux/mutex.h | 5 ++- kernel/locking/mutex.c | 60 ++++---------------------------------- 3 files changed, 74 insertions(+), 55 deletions(-) create mode 100644 include/linux/mcs_spinlock.h diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h new file mode 100644 index 0000000..b5de3b0 --- /dev/null +++ b/include/linux/mcs_spinlock.h @@ -0,0 +1,64 @@ +/* + * MCS lock defines + * + * This file contains the main data structure and API definitions of MCS lock. + * + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock + * with the desirable properties of being fair, and with each cpu trying + * to acquire the lock spinning on a local variable. + * It avoids expensive cache bouncings that common test-and-set spin-lock + * implementations incur. + */ +#ifndef __LINUX_MCS_SPINLOCK_H +#define __LINUX_MCS_SPINLOCK_H + +struct mcs_spinlock { + struct mcs_spinlock *next; + int locked; /* 1 if lock acquired */ +}; + +/* + * We don't inline mcs_spin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ +static noinline +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* Lock acquired */ + node->locked = 1; + return; + } + ACCESS_ONCE(prev->next) = node; + smp_wmb(); + /* Wait until the lock holder passes the lock down */ + while (!ACCESS_ONCE(node->locked)) + arch_mutex_cpu_relax(); +} + +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (cmpxchg(lock, node, NULL) == node) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + ACCESS_ONCE(next->locked) = 1; + smp_wmb(); +} + +#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bab49da..32a32e6 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -46,6 +46,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ +struct mcs_spinlock; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -55,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - void *spin_mlock; /* Spinner MCS lock */ + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); # define arch_mutex_cpu_relax() cpu_relax() #endif -#endif +#endif /* __LINUX_MUTEX_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d24105b..e08b183 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -25,6 +25,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> +#include <linux/mcs_spinlock.h> /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - lock->spin_mlock = NULL; + lock->mcs_lock = NULL; #endif debug_mutex_init(lock, name, key); @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); * more or less simultaneously, the spinners need to acquire a MCS lock * first before spinning on the owner field. * - * We don't inline mspin_lock() so that perf can correctly account for the - * time spent in this lock function. */ -struct mspin_node { - struct mspin_node *next ; - int locked; /* 1 if lock acquired */ -}; -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) - -static noinline -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) -{ - struct mspin_node *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* Lock acquired */ - node->locked = 1; - return; - } - ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) - arch_mutex_cpu_relax(); -} - -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) -{ - struct mspin_node *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (cmpxchg(lock, node, NULL) == node) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); -} /* * Mutex spinning code migrated from kernel/sched/core.c @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, for (;;) { struct task_struct *owner; - struct mspin_node node; + struct mcs_spinlock node; if (use_ww_ctx && ww_ctx->acquired > 0) { struct ww_mutex *ww; @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * If there's an owner, wait for it to either * release the lock or go to sleep. */ - mspin_lock(MLOCK(lock), &node); + mcs_spin_lock(&lock->mcs_lock, &node); owner = ACCESS_ONCE(lock->owner); if (owner && !mutex_spin_on_owner(lock, owner)) { - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); goto slowpath; } @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } mutex_set_owner(lock); - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); preempt_enable(); return 0; } - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); /* * When there's no owner, we might have preempted between the -- 1.7.4.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-08 19:51 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:51 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang We will need the MCS lock code for doing optimistic spinning for rwsem and queue rwlock. Extracting the MCS code from mutex.c and put into its own file allow us to reuse this code easily. Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> --- include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ include/linux/mutex.h | 5 ++- kernel/locking/mutex.c | 60 ++++---------------------------------- 3 files changed, 74 insertions(+), 55 deletions(-) create mode 100644 include/linux/mcs_spinlock.h diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h new file mode 100644 index 0000000..b5de3b0 --- /dev/null +++ b/include/linux/mcs_spinlock.h @@ -0,0 +1,64 @@ +/* + * MCS lock defines + * + * This file contains the main data structure and API definitions of MCS lock. + * + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock + * with the desirable properties of being fair, and with each cpu trying + * to acquire the lock spinning on a local variable. + * It avoids expensive cache bouncings that common test-and-set spin-lock + * implementations incur. + */ +#ifndef __LINUX_MCS_SPINLOCK_H +#define __LINUX_MCS_SPINLOCK_H + +struct mcs_spinlock { + struct mcs_spinlock *next; + int locked; /* 1 if lock acquired */ +}; + +/* + * We don't inline mcs_spin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ +static noinline +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* Lock acquired */ + node->locked = 1; + return; + } + ACCESS_ONCE(prev->next) = node; + smp_wmb(); + /* Wait until the lock holder passes the lock down */ + while (!ACCESS_ONCE(node->locked)) + arch_mutex_cpu_relax(); +} + +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (cmpxchg(lock, node, NULL) == node) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + ACCESS_ONCE(next->locked) = 1; + smp_wmb(); +} + +#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bab49da..32a32e6 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -46,6 +46,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ +struct mcs_spinlock; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -55,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - void *spin_mlock; /* Spinner MCS lock */ + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); # define arch_mutex_cpu_relax() cpu_relax() #endif -#endif +#endif /* __LINUX_MUTEX_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d24105b..e08b183 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -25,6 +25,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> +#include <linux/mcs_spinlock.h> /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - lock->spin_mlock = NULL; + lock->mcs_lock = NULL; #endif debug_mutex_init(lock, name, key); @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); * more or less simultaneously, the spinners need to acquire a MCS lock * first before spinning on the owner field. * - * We don't inline mspin_lock() so that perf can correctly account for the - * time spent in this lock function. */ -struct mspin_node { - struct mspin_node *next ; - int locked; /* 1 if lock acquired */ -}; -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) - -static noinline -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) -{ - struct mspin_node *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* Lock acquired */ - node->locked = 1; - return; - } - ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) - arch_mutex_cpu_relax(); -} - -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) -{ - struct mspin_node *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (cmpxchg(lock, node, NULL) == node) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); -} /* * Mutex spinning code migrated from kernel/sched/core.c @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, for (;;) { struct task_struct *owner; - struct mspin_node node; + struct mcs_spinlock node; if (use_ww_ctx && ww_ctx->acquired > 0) { struct ww_mutex *ww; @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * If there's an owner, wait for it to either * release the lock or go to sleep. */ - mspin_lock(MLOCK(lock), &node); + mcs_spin_lock(&lock->mcs_lock, &node); owner = ACCESS_ONCE(lock->owner); if (owner && !mutex_spin_on_owner(lock, owner)) { - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); goto slowpath; } @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } mutex_set_owner(lock); - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); preempt_enable(); return 0; } - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); /* * When there's no owner, we might have preempted between the -- 1.7.4.4 ^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-19 19:10 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:10 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, "H. Peter Anvin" <hpa> On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote: > We will need the MCS lock code for doing optimistic spinning for rwsem > and queue rwlock. Extracting the MCS code from mutex.c and put into > its own file allow us to reuse this code easily. > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> Please see comments below. Thanx, Paul > --- > include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ > include/linux/mutex.h | 5 ++- > kernel/locking/mutex.c | 60 ++++---------------------------------- > 3 files changed, 74 insertions(+), 55 deletions(-) > create mode 100644 include/linux/mcs_spinlock.h > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > new file mode 100644 > index 0000000..b5de3b0 > --- /dev/null > +++ b/include/linux/mcs_spinlock.h > @@ -0,0 +1,64 @@ > +/* > + * MCS lock defines > + * > + * This file contains the main data structure and API definitions of MCS lock. > + * > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > + * with the desirable properties of being fair, and with each cpu trying > + * to acquire the lock spinning on a local variable. > + * It avoids expensive cache bouncings that common test-and-set spin-lock > + * implementations incur. > + */ > +#ifndef __LINUX_MCS_SPINLOCK_H > +#define __LINUX_MCS_SPINLOCK_H > + > +struct mcs_spinlock { > + struct mcs_spinlock *next; > + int locked; /* 1 if lock acquired */ > +}; > + > +/* > + * We don't inline mcs_spin_lock() so that perf can correctly account for the > + * time spent in this lock function. > + */ > +static noinline > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > +{ > + struct mcs_spinlock *prev; > + > + /* Init node */ > + node->locked = 0; > + node->next = NULL; > + > + prev = xchg(lock, node); OK, the full memory barriers implied by xchg() ensure that *node will be initialized before the "ACCESS_ONCE(prev->next) = node" below puts the node into the list. This rules out the misordering scenario that Tim Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK> on September 27th. Assuming of course a corresponding barrier on the lock handoff side. > + if (likely(prev == NULL)) { > + /* Lock acquired */ > + node->locked = 1; > + return; > + } > + ACCESS_ONCE(prev->next) = node; > + smp_wmb(); I don't see what the above memory barrier does. Here are some things that it cannot be doing: o Ordering the insertion into the list above with the polling below. First, smp_wmb() does not order prior writes against later reads, and second misordering is harmless. If we start polling before the insertion is complete, all that happens is that the first few polls have no chance of seeing a lock grant. o Ordering the polling against the initialization -- the above xchg() is already doing that for us. So what is its purpose? > + /* Wait until the lock holder passes the lock down */ > + while (!ACCESS_ONCE(node->locked)) > + arch_mutex_cpu_relax(); On the other hand, I don't see how we get away without a barrier here. As written, what prevents the caller's load from ->owner from being reordered with the above load from ->locked? (Perhaps you can argue that such reordering is only a performance problem, but if so we need that argument recorded in comments.) Of course, if anyone ever tries to use mcs_spin_lock() as a full lock, they will need a memory barrier here to prevent the critical section from leaking out. > +} > + > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > +{ > + struct mcs_spinlock *next = ACCESS_ONCE(node->next); > + > + if (likely(!next)) { > + /* > + * Release the lock by setting it to NULL > + */ > + if (cmpxchg(lock, node, NULL) == node) > + return; > + /* Wait until the next pointer is set */ > + while (!(next = ACCESS_ONCE(node->next))) > + arch_mutex_cpu_relax(); > + } We need a memory barrier somewhere before here in this function, otherwise the critical section can leak out. I do not believe that we can rely on the prohibition against speculative stores that Peter Zijlstra and I have been discussing because that does not provide the transitivity required by locking primitives. I believe that we -could- make the access below be an smp_store_release(), though. Placing the barrier here (or at least not preceding the initial fetch from node->next) has the advantage of allowing it to pair with the xchg() in mcs_spin_lock(), though given the dependency only an smp_read_barrier_depends() is required for that purpose. > + ACCESS_ONCE(next->locked) = 1; > + smp_wmb(); I don't see what this barrier does for us. It is ordering the unlock store with what, exactly? If it really is doing something, we need a big fat comment stating what that is, and checkpatch.pl will be happy to inform you. ;-) > +} > + > +#endif /* __LINUX_MCS_SPINLOCK_H */ > diff --git a/include/linux/mutex.h b/include/linux/mutex.h > index bab49da..32a32e6 100644 > --- a/include/linux/mutex.h > +++ b/include/linux/mutex.h > @@ -46,6 +46,7 @@ > * - detects multi-task circular deadlocks and prints out all affected > * locks and tasks (and only those tasks) > */ > +struct mcs_spinlock; > struct mutex { > /* 1: unlocked, 0: locked, negative: locked, possible waiters */ > atomic_t count; > @@ -55,7 +56,7 @@ struct mutex { > struct task_struct *owner; > #endif > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > - void *spin_mlock; /* Spinner MCS lock */ > + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ > #endif > #ifdef CONFIG_DEBUG_MUTEXES > const char *name; > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); > # define arch_mutex_cpu_relax() cpu_relax() > #endif > > -#endif > +#endif /* __LINUX_MUTEX_H */ > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c > index d24105b..e08b183 100644 > --- a/kernel/locking/mutex.c > +++ b/kernel/locking/mutex.c > @@ -25,6 +25,7 @@ > #include <linux/spinlock.h> > #include <linux/interrupt.h> > #include <linux/debug_locks.h> > +#include <linux/mcs_spinlock.h> > > /* > * In the DEBUG case we are using the "NULL fastpath" for mutexes, > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) > INIT_LIST_HEAD(&lock->wait_list); > mutex_clear_owner(lock); > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > - lock->spin_mlock = NULL; > + lock->mcs_lock = NULL; > #endif > > debug_mutex_init(lock, name, key); > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); > * more or less simultaneously, the spinners need to acquire a MCS lock > * first before spinning on the owner field. > * > - * We don't inline mspin_lock() so that perf can correctly account for the > - * time spent in this lock function. > */ > -struct mspin_node { > - struct mspin_node *next ; > - int locked; /* 1 if lock acquired */ > -}; > -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) > - > -static noinline > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) > -{ > - struct mspin_node *prev; > - > - /* Init node */ > - node->locked = 0; > - node->next = NULL; > - > - prev = xchg(lock, node); > - if (likely(prev == NULL)) { > - /* Lock acquired */ > - node->locked = 1; > - return; > - } > - ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > - arch_mutex_cpu_relax(); > -} > - > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) > -{ > - struct mspin_node *next = ACCESS_ONCE(node->next); > - > - if (likely(!next)) { > - /* > - * Release the lock by setting it to NULL > - */ > - if (cmpxchg(lock, node, NULL) == node) > - return; > - /* Wait until the next pointer is set */ > - while (!(next = ACCESS_ONCE(node->next))) > - arch_mutex_cpu_relax(); > - } > - ACCESS_ONCE(next->locked) = 1; > - smp_wmb(); > -} > > /* > * Mutex spinning code migrated from kernel/sched/core.c > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > for (;;) { > struct task_struct *owner; > - struct mspin_node node; > + struct mcs_spinlock node; > > if (use_ww_ctx && ww_ctx->acquired > 0) { > struct ww_mutex *ww; > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > * If there's an owner, wait for it to either > * release the lock or go to sleep. > */ > - mspin_lock(MLOCK(lock), &node); > + mcs_spin_lock(&lock->mcs_lock, &node); > owner = ACCESS_ONCE(lock->owner); > if (owner && !mutex_spin_on_owner(lock, owner)) { > - mspin_unlock(MLOCK(lock), &node); > + mcs_spin_unlock(&lock->mcs_lock, &node); > goto slowpath; > } > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > } > > mutex_set_owner(lock); > - mspin_unlock(MLOCK(lock), &node); > + mcs_spin_unlock(&lock->mcs_lock, &node); > preempt_enable(); > return 0; > } > - mspin_unlock(MLOCK(lock), &node); > + mcs_spin_unlock(&lock->mcs_lock, &node); > > /* > * When there's no owner, we might have preempted between the > -- > 1.7.4.4 > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-19 19:10 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:10 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote: > We will need the MCS lock code for doing optimistic spinning for rwsem > and queue rwlock. Extracting the MCS code from mutex.c and put into > its own file allow us to reuse this code easily. > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> Please see comments below. Thanx, Paul > --- > include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ > include/linux/mutex.h | 5 ++- > kernel/locking/mutex.c | 60 ++++---------------------------------- > 3 files changed, 74 insertions(+), 55 deletions(-) > create mode 100644 include/linux/mcs_spinlock.h > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > new file mode 100644 > index 0000000..b5de3b0 > --- /dev/null > +++ b/include/linux/mcs_spinlock.h > @@ -0,0 +1,64 @@ > +/* > + * MCS lock defines > + * > + * This file contains the main data structure and API definitions of MCS lock. > + * > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > + * with the desirable properties of being fair, and with each cpu trying > + * to acquire the lock spinning on a local variable. > + * It avoids expensive cache bouncings that common test-and-set spin-lock > + * implementations incur. > + */ > +#ifndef __LINUX_MCS_SPINLOCK_H > +#define __LINUX_MCS_SPINLOCK_H > + > +struct mcs_spinlock { > + struct mcs_spinlock *next; > + int locked; /* 1 if lock acquired */ > +}; > + > +/* > + * We don't inline mcs_spin_lock() so that perf can correctly account for the > + * time spent in this lock function. > + */ > +static noinline > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > +{ > + struct mcs_spinlock *prev; > + > + /* Init node */ > + node->locked = 0; > + node->next = NULL; > + > + prev = xchg(lock, node); OK, the full memory barriers implied by xchg() ensure that *node will be initialized before the "ACCESS_ONCE(prev->next) = node" below puts the node into the list. This rules out the misordering scenario that Tim Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK> on September 27th. Assuming of course a corresponding barrier on the lock handoff side. > + if (likely(prev == NULL)) { > + /* Lock acquired */ > + node->locked = 1; > + return; > + } > + ACCESS_ONCE(prev->next) = node; > + smp_wmb(); I don't see what the above memory barrier does. Here are some things that it cannot be doing: o Ordering the insertion into the list above with the polling below. First, smp_wmb() does not order prior writes against later reads, and second misordering is harmless. If we start polling before the insertion is complete, all that happens is that the first few polls have no chance of seeing a lock grant. o Ordering the polling against the initialization -- the above xchg() is already doing that for us. So what is its purpose? > + /* Wait until the lock holder passes the lock down */ > + while (!ACCESS_ONCE(node->locked)) > + arch_mutex_cpu_relax(); On the other hand, I don't see how we get away without a barrier here. As written, what prevents the caller's load from ->owner from being reordered with the above load from ->locked? (Perhaps you can argue that such reordering is only a performance problem, but if so we need that argument recorded in comments.) Of course, if anyone ever tries to use mcs_spin_lock() as a full lock, they will need a memory barrier here to prevent the critical section from leaking out. > +} > + > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > +{ > + struct mcs_spinlock *next = ACCESS_ONCE(node->next); > + > + if (likely(!next)) { > + /* > + * Release the lock by setting it to NULL > + */ > + if (cmpxchg(lock, node, NULL) == node) > + return; > + /* Wait until the next pointer is set */ > + while (!(next = ACCESS_ONCE(node->next))) > + arch_mutex_cpu_relax(); > + } We need a memory barrier somewhere before here in this function, otherwise the critical section can leak out. I do not believe that we can rely on the prohibition against speculative stores that Peter Zijlstra and I have been discussing because that does not provide the transitivity required by locking primitives. I believe that we -could- make the access below be an smp_store_release(), though. Placing the barrier here (or at least not preceding the initial fetch from node->next) has the advantage of allowing it to pair with the xchg() in mcs_spin_lock(), though given the dependency only an smp_read_barrier_depends() is required for that purpose. > + ACCESS_ONCE(next->locked) = 1; > + smp_wmb(); I don't see what this barrier does for us. It is ordering the unlock store with what, exactly? If it really is doing something, we need a big fat comment stating what that is, and checkpatch.pl will be happy to inform you. ;-) > +} > + > +#endif /* __LINUX_MCS_SPINLOCK_H */ > diff --git a/include/linux/mutex.h b/include/linux/mutex.h > index bab49da..32a32e6 100644 > --- a/include/linux/mutex.h > +++ b/include/linux/mutex.h > @@ -46,6 +46,7 @@ > * - detects multi-task circular deadlocks and prints out all affected > * locks and tasks (and only those tasks) > */ > +struct mcs_spinlock; > struct mutex { > /* 1: unlocked, 0: locked, negative: locked, possible waiters */ > atomic_t count; > @@ -55,7 +56,7 @@ struct mutex { > struct task_struct *owner; > #endif > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > - void *spin_mlock; /* Spinner MCS lock */ > + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ > #endif > #ifdef CONFIG_DEBUG_MUTEXES > const char *name; > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); > # define arch_mutex_cpu_relax() cpu_relax() > #endif > > -#endif > +#endif /* __LINUX_MUTEX_H */ > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c > index d24105b..e08b183 100644 > --- a/kernel/locking/mutex.c > +++ b/kernel/locking/mutex.c > @@ -25,6 +25,7 @@ > #include <linux/spinlock.h> > #include <linux/interrupt.h> > #include <linux/debug_locks.h> > +#include <linux/mcs_spinlock.h> > > /* > * In the DEBUG case we are using the "NULL fastpath" for mutexes, > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) > INIT_LIST_HEAD(&lock->wait_list); > mutex_clear_owner(lock); > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > - lock->spin_mlock = NULL; > + lock->mcs_lock = NULL; > #endif > > debug_mutex_init(lock, name, key); > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); > * more or less simultaneously, the spinners need to acquire a MCS lock > * first before spinning on the owner field. > * > - * We don't inline mspin_lock() so that perf can correctly account for the > - * time spent in this lock function. > */ > -struct mspin_node { > - struct mspin_node *next ; > - int locked; /* 1 if lock acquired */ > -}; > -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) > - > -static noinline > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) > -{ > - struct mspin_node *prev; > - > - /* Init node */ > - node->locked = 0; > - node->next = NULL; > - > - prev = xchg(lock, node); > - if (likely(prev == NULL)) { > - /* Lock acquired */ > - node->locked = 1; > - return; > - } > - ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > - arch_mutex_cpu_relax(); > -} > - > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) > -{ > - struct mspin_node *next = ACCESS_ONCE(node->next); > - > - if (likely(!next)) { > - /* > - * Release the lock by setting it to NULL > - */ > - if (cmpxchg(lock, node, NULL) == node) > - return; > - /* Wait until the next pointer is set */ > - while (!(next = ACCESS_ONCE(node->next))) > - arch_mutex_cpu_relax(); > - } > - ACCESS_ONCE(next->locked) = 1; > - smp_wmb(); > -} > > /* > * Mutex spinning code migrated from kernel/sched/core.c > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > for (;;) { > struct task_struct *owner; > - struct mspin_node node; > + struct mcs_spinlock node; > > if (use_ww_ctx && ww_ctx->acquired > 0) { > struct ww_mutex *ww; > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > * If there's an owner, wait for it to either > * release the lock or go to sleep. > */ > - mspin_lock(MLOCK(lock), &node); > + mcs_spin_lock(&lock->mcs_lock, &node); > owner = ACCESS_ONCE(lock->owner); > if (owner && !mutex_spin_on_owner(lock, owner)) { > - mspin_unlock(MLOCK(lock), &node); > + mcs_spin_unlock(&lock->mcs_lock, &node); > goto slowpath; > } > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > } > > mutex_set_owner(lock); > - mspin_unlock(MLOCK(lock), &node); > + mcs_spin_unlock(&lock->mcs_lock, &node); > preempt_enable(); > return 0; > } > - mspin_unlock(MLOCK(lock), &node); > + mcs_spin_unlock(&lock->mcs_lock, &node); > > /* > * When there's no owner, we might have preempted between the > -- > 1.7.4.4 > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-19 19:10 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:10 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote: > We will need the MCS lock code for doing optimistic spinning for rwsem > and queue rwlock. Extracting the MCS code from mutex.c and put into > its own file allow us to reuse this code easily. > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> Please see comments below. Thanx, Paul > --- > include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ > include/linux/mutex.h | 5 ++- > kernel/locking/mutex.c | 60 ++++---------------------------------- > 3 files changed, 74 insertions(+), 55 deletions(-) > create mode 100644 include/linux/mcs_spinlock.h > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > new file mode 100644 > index 0000000..b5de3b0 > --- /dev/null > +++ b/include/linux/mcs_spinlock.h > @@ -0,0 +1,64 @@ > +/* > + * MCS lock defines > + * > + * This file contains the main data structure and API definitions of MCS lock. > + * > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > + * with the desirable properties of being fair, and with each cpu trying > + * to acquire the lock spinning on a local variable. > + * It avoids expensive cache bouncings that common test-and-set spin-lock > + * implementations incur. > + */ > +#ifndef __LINUX_MCS_SPINLOCK_H > +#define __LINUX_MCS_SPINLOCK_H > + > +struct mcs_spinlock { > + struct mcs_spinlock *next; > + int locked; /* 1 if lock acquired */ > +}; > + > +/* > + * We don't inline mcs_spin_lock() so that perf can correctly account for the > + * time spent in this lock function. > + */ > +static noinline > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > +{ > + struct mcs_spinlock *prev; > + > + /* Init node */ > + node->locked = 0; > + node->next = NULL; > + > + prev = xchg(lock, node); OK, the full memory barriers implied by xchg() ensure that *node will be initialized before the "ACCESS_ONCE(prev->next) = node" below puts the node into the list. This rules out the misordering scenario that Tim Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK> on September 27th. Assuming of course a corresponding barrier on the lock handoff side. > + if (likely(prev == NULL)) { > + /* Lock acquired */ > + node->locked = 1; > + return; > + } > + ACCESS_ONCE(prev->next) = node; > + smp_wmb(); I don't see what the above memory barrier does. Here are some things that it cannot be doing: o Ordering the insertion into the list above with the polling below. First, smp_wmb() does not order prior writes against later reads, and second misordering is harmless. If we start polling before the insertion is complete, all that happens is that the first few polls have no chance of seeing a lock grant. o Ordering the polling against the initialization -- the above xchg() is already doing that for us. So what is its purpose? > + /* Wait until the lock holder passes the lock down */ > + while (!ACCESS_ONCE(node->locked)) > + arch_mutex_cpu_relax(); On the other hand, I don't see how we get away without a barrier here. As written, what prevents the caller's load from ->owner from being reordered with the above load from ->locked? (Perhaps you can argue that such reordering is only a performance problem, but if so we need that argument recorded in comments.) Of course, if anyone ever tries to use mcs_spin_lock() as a full lock, they will need a memory barrier here to prevent the critical section from leaking out. > +} > + > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > +{ > + struct mcs_spinlock *next = ACCESS_ONCE(node->next); > + > + if (likely(!next)) { > + /* > + * Release the lock by setting it to NULL > + */ > + if (cmpxchg(lock, node, NULL) == node) > + return; > + /* Wait until the next pointer is set */ > + while (!(next = ACCESS_ONCE(node->next))) > + arch_mutex_cpu_relax(); > + } We need a memory barrier somewhere before here in this function, otherwise the critical section can leak out. I do not believe that we can rely on the prohibition against speculative stores that Peter Zijlstra and I have been discussing because that does not provide the transitivity required by locking primitives. I believe that we -could- make the access below be an smp_store_release(), though. Placing the barrier here (or at least not preceding the initial fetch from node->next) has the advantage of allowing it to pair with the xchg() in mcs_spin_lock(), though given the dependency only an smp_read_barrier_depends() is required for that purpose. > + ACCESS_ONCE(next->locked) = 1; > + smp_wmb(); I don't see what this barrier does for us. It is ordering the unlock store with what, exactly? If it really is doing something, we need a big fat comment stating what that is, and checkpatch.pl will be happy to inform you. ;-) > +} > + > +#endif /* __LINUX_MCS_SPINLOCK_H */ > diff --git a/include/linux/mutex.h b/include/linux/mutex.h > index bab49da..32a32e6 100644 > --- a/include/linux/mutex.h > +++ b/include/linux/mutex.h > @@ -46,6 +46,7 @@ > * - detects multi-task circular deadlocks and prints out all affected > * locks and tasks (and only those tasks) > */ > +struct mcs_spinlock; > struct mutex { > /* 1: unlocked, 0: locked, negative: locked, possible waiters */ > atomic_t count; > @@ -55,7 +56,7 @@ struct mutex { > struct task_struct *owner; > #endif > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > - void *spin_mlock; /* Spinner MCS lock */ > + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ > #endif > #ifdef CONFIG_DEBUG_MUTEXES > const char *name; > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); > # define arch_mutex_cpu_relax() cpu_relax() > #endif > > -#endif > +#endif /* __LINUX_MUTEX_H */ > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c > index d24105b..e08b183 100644 > --- a/kernel/locking/mutex.c > +++ b/kernel/locking/mutex.c > @@ -25,6 +25,7 @@ > #include <linux/spinlock.h> > #include <linux/interrupt.h> > #include <linux/debug_locks.h> > +#include <linux/mcs_spinlock.h> > > /* > * In the DEBUG case we are using the "NULL fastpath" for mutexes, > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) > INIT_LIST_HEAD(&lock->wait_list); > mutex_clear_owner(lock); > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > - lock->spin_mlock = NULL; > + lock->mcs_lock = NULL; > #endif > > debug_mutex_init(lock, name, key); > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); > * more or less simultaneously, the spinners need to acquire a MCS lock > * first before spinning on the owner field. > * > - * We don't inline mspin_lock() so that perf can correctly account for the > - * time spent in this lock function. > */ > -struct mspin_node { > - struct mspin_node *next ; > - int locked; /* 1 if lock acquired */ > -}; > -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) > - > -static noinline > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) > -{ > - struct mspin_node *prev; > - > - /* Init node */ > - node->locked = 0; > - node->next = NULL; > - > - prev = xchg(lock, node); > - if (likely(prev == NULL)) { > - /* Lock acquired */ > - node->locked = 1; > - return; > - } > - ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > - arch_mutex_cpu_relax(); > -} > - > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) > -{ > - struct mspin_node *next = ACCESS_ONCE(node->next); > - > - if (likely(!next)) { > - /* > - * Release the lock by setting it to NULL > - */ > - if (cmpxchg(lock, node, NULL) == node) > - return; > - /* Wait until the next pointer is set */ > - while (!(next = ACCESS_ONCE(node->next))) > - arch_mutex_cpu_relax(); > - } > - ACCESS_ONCE(next->locked) = 1; > - smp_wmb(); > -} > > /* > * Mutex spinning code migrated from kernel/sched/core.c > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > for (;;) { > struct task_struct *owner; > - struct mspin_node node; > + struct mcs_spinlock node; > > if (use_ww_ctx && ww_ctx->acquired > 0) { > struct ww_mutex *ww; > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > * If there's an owner, wait for it to either > * release the lock or go to sleep. > */ > - mspin_lock(MLOCK(lock), &node); > + mcs_spin_lock(&lock->mcs_lock, &node); > owner = ACCESS_ONCE(lock->owner); > if (owner && !mutex_spin_on_owner(lock, owner)) { > - mspin_unlock(MLOCK(lock), &node); > + mcs_spin_unlock(&lock->mcs_lock, &node); > goto slowpath; > } > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > } > > mutex_set_owner(lock); > - mspin_unlock(MLOCK(lock), &node); > + mcs_spin_unlock(&lock->mcs_lock, &node); > preempt_enable(); > return 0; > } > - mspin_unlock(MLOCK(lock), &node); > + mcs_spin_unlock(&lock->mcs_lock, &node); > > /* > * When there's no owner, we might have preempted between the > -- > 1.7.4.4 > > > ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-19 19:42 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote: > > We will need the MCS lock code for doing optimistic spinning for rwsem > > and queue rwlock. Extracting the MCS code from mutex.c and put into > > its own file allow us to reuse this code easily. > > > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> > > Please see comments below. > Thanks for reviewing the code. > Thanx, Paul > > > --- > > include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ > > include/linux/mutex.h | 5 ++- > > kernel/locking/mutex.c | 60 ++++---------------------------------- > > 3 files changed, 74 insertions(+), 55 deletions(-) > > create mode 100644 include/linux/mcs_spinlock.h > > > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > > new file mode 100644 > > index 0000000..b5de3b0 > > --- /dev/null > > +++ b/include/linux/mcs_spinlock.h > > @@ -0,0 +1,64 @@ > > +/* > > + * MCS lock defines > > + * > > + * This file contains the main data structure and API definitions of MCS lock. > > + * > > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > > + * with the desirable properties of being fair, and with each cpu trying > > + * to acquire the lock spinning on a local variable. > > + * It avoids expensive cache bouncings that common test-and-set spin-lock > > + * implementations incur. > > + */ > > +#ifndef __LINUX_MCS_SPINLOCK_H > > +#define __LINUX_MCS_SPINLOCK_H > > + > > +struct mcs_spinlock { > > + struct mcs_spinlock *next; > > + int locked; /* 1 if lock acquired */ > > +}; > > + > > +/* > > + * We don't inline mcs_spin_lock() so that perf can correctly account for the > > + * time spent in this lock function. > > + */ > > +static noinline > > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > +{ > > + struct mcs_spinlock *prev; > > + > > + /* Init node */ > > + node->locked = 0; > > + node->next = NULL; > > + > > + prev = xchg(lock, node); > > OK, the full memory barriers implied by xchg() ensure that *node will be > initialized before the "ACCESS_ONCE(prev->next) = node" below puts the > node into the list. This rules out the misordering scenario that Tim > Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK> > on September 27th. > > Assuming of course a corresponding barrier on the lock handoff side. > > > + if (likely(prev == NULL)) { > > + /* Lock acquired */ > > + node->locked = 1; > > + return; > > + } > > + ACCESS_ONCE(prev->next) = node; > > + smp_wmb(); > > I don't see what the above memory barrier does. Here are some things > that it cannot be doing: > > o Ordering the insertion into the list above with the polling > below. First, smp_wmb() does not order prior writes against > later reads, and second misordering is harmless. If we start > polling before the insertion is complete, all that happens > is that the first few polls have no chance of seeing a lock > grant. > > o Ordering the polling against the initialization -- the above > xchg() is already doing that for us. > > So what is its purpose? Agree that the smp_wmb is not needed. It is in the existing mcs code residing in mutex.c and we're re-factoring the code only here and hasn't corrected the memory barrier. The particular smp_wmb() is removed in Patch 4/4 that corrects the memory barriers. > > > + /* Wait until the lock holder passes the lock down */ > > + while (!ACCESS_ONCE(node->locked)) > > + arch_mutex_cpu_relax(); > > On the other hand, I don't see how we get away without a barrier here. > As written, what prevents the caller's load from ->owner from being > reordered with the above load from ->locked? (Perhaps you can argue > that such reordering is only a performance problem, but if so we need > that argument recorded in comments.) > > Of course, if anyone ever tries to use mcs_spin_lock() as a full lock, > they will need a memory barrier here to prevent the critical section > from leaking out. Agree too. The appropriate memory barrier is added in Patch 4/4. > > > +} > > + > > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > +{ > > + struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > + > > + if (likely(!next)) { > > + /* > > + * Release the lock by setting it to NULL > > + */ > > + if (cmpxchg(lock, node, NULL) == node) > > + return; > > + /* Wait until the next pointer is set */ > > + while (!(next = ACCESS_ONCE(node->next))) > > + arch_mutex_cpu_relax(); > > + } > > We need a memory barrier somewhere before here in this function, > otherwise the critical section can leak out. I do not believe that > we can rely on the prohibition against speculative stores that Peter > Zijlstra and I have been discussing because that does not provide the > transitivity required by locking primitives. I believe that we -could- > make the access below be an smp_store_release(), though. > > Placing the barrier here (or at least not preceding the initial > fetch from node->next) has the advantage of allowing it to pair with > the xchg() in mcs_spin_lock(), though given the dependency only an > smp_read_barrier_depends() is required for that purpose. > > > + ACCESS_ONCE(next->locked) = 1; > > + smp_wmb(); > > I don't see what this barrier does for us. It is ordering the unlock > store with what, exactly? > > If it really is doing something, we need a big fat comment stating what > that is, and checkpatch.pl will be happy to inform you. ;-) > > > +} > > + > > +#endif /* __LINUX_MCS_SPINLOCK_H */ > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h > > index bab49da..32a32e6 100644 > > --- a/include/linux/mutex.h > > +++ b/include/linux/mutex.h > > @@ -46,6 +46,7 @@ > > * - detects multi-task circular deadlocks and prints out all affected > > * locks and tasks (and only those tasks) > > */ > > +struct mcs_spinlock; > > struct mutex { > > /* 1: unlocked, 0: locked, negative: locked, possible waiters */ > > atomic_t count; > > @@ -55,7 +56,7 @@ struct mutex { > > struct task_struct *owner; > > #endif > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > - void *spin_mlock; /* Spinner MCS lock */ > > + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ > > #endif > > #ifdef CONFIG_DEBUG_MUTEXES > > const char *name; > > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); > > # define arch_mutex_cpu_relax() cpu_relax() > > #endif > > > > -#endif > > +#endif /* __LINUX_MUTEX_H */ > > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c > > index d24105b..e08b183 100644 > > --- a/kernel/locking/mutex.c > > +++ b/kernel/locking/mutex.c > > @@ -25,6 +25,7 @@ > > #include <linux/spinlock.h> > > #include <linux/interrupt.h> > > #include <linux/debug_locks.h> > > +#include <linux/mcs_spinlock.h> > > > > /* > > * In the DEBUG case we are using the "NULL fastpath" for mutexes, > > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) > > INIT_LIST_HEAD(&lock->wait_list); > > mutex_clear_owner(lock); > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > - lock->spin_mlock = NULL; > > + lock->mcs_lock = NULL; > > #endif > > > > debug_mutex_init(lock, name, key); > > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); > > * more or less simultaneously, the spinners need to acquire a MCS lock > > * first before spinning on the owner field. > > * > > - * We don't inline mspin_lock() so that perf can correctly account for the > > - * time spent in this lock function. > > */ > > -struct mspin_node { > > - struct mspin_node *next ; > > - int locked; /* 1 if lock acquired */ > > -}; > > -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) > > - > > -static noinline > > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) > > -{ > > - struct mspin_node *prev; > > - > > - /* Init node */ > > - node->locked = 0; > > - node->next = NULL; > > - > > - prev = xchg(lock, node); > > - if (likely(prev == NULL)) { > > - /* Lock acquired */ > > - node->locked = 1; > > - return; > > - } > > - ACCESS_ONCE(prev->next) = node; > > - smp_wmb(); > > - /* Wait until the lock holder passes the lock down */ > > - while (!ACCESS_ONCE(node->locked)) > > - arch_mutex_cpu_relax(); > > -} > > - > > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) > > -{ > > - struct mspin_node *next = ACCESS_ONCE(node->next); > > - > > - if (likely(!next)) { > > - /* > > - * Release the lock by setting it to NULL > > - */ > > - if (cmpxchg(lock, node, NULL) == node) > > - return; > > - /* Wait until the next pointer is set */ > > - while (!(next = ACCESS_ONCE(node->next))) > > - arch_mutex_cpu_relax(); > > - } > > - ACCESS_ONCE(next->locked) = 1; > > - smp_wmb(); > > -} > > > > /* > > * Mutex spinning code migrated from kernel/sched/core.c > > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > > for (;;) { > > struct task_struct *owner; > > - struct mspin_node node; > > + struct mcs_spinlock node; > > > > if (use_ww_ctx && ww_ctx->acquired > 0) { > > struct ww_mutex *ww; > > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > * If there's an owner, wait for it to either > > * release the lock or go to sleep. > > */ > > - mspin_lock(MLOCK(lock), &node); > > + mcs_spin_lock(&lock->mcs_lock, &node); > > owner = ACCESS_ONCE(lock->owner); > > if (owner && !mutex_spin_on_owner(lock, owner)) { > > - mspin_unlock(MLOCK(lock), &node); > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > goto slowpath; > > } > > > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > } > > > > mutex_set_owner(lock); > > - mspin_unlock(MLOCK(lock), &node); > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > preempt_enable(); > > return 0; > > } > > - mspin_unlock(MLOCK(lock), &node); > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > > /* > > * When there's no owner, we might have preempted between the > > -- > > 1.7.4.4 > > > > > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-19 19:42 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote: > > We will need the MCS lock code for doing optimistic spinning for rwsem > > and queue rwlock. Extracting the MCS code from mutex.c and put into > > its own file allow us to reuse this code easily. > > > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> > > Please see comments below. > Thanks for reviewing the code. > Thanx, Paul > > > --- > > include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ > > include/linux/mutex.h | 5 ++- > > kernel/locking/mutex.c | 60 ++++---------------------------------- > > 3 files changed, 74 insertions(+), 55 deletions(-) > > create mode 100644 include/linux/mcs_spinlock.h > > > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > > new file mode 100644 > > index 0000000..b5de3b0 > > --- /dev/null > > +++ b/include/linux/mcs_spinlock.h > > @@ -0,0 +1,64 @@ > > +/* > > + * MCS lock defines > > + * > > + * This file contains the main data structure and API definitions of MCS lock. > > + * > > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > > + * with the desirable properties of being fair, and with each cpu trying > > + * to acquire the lock spinning on a local variable. > > + * It avoids expensive cache bouncings that common test-and-set spin-lock > > + * implementations incur. > > + */ > > +#ifndef __LINUX_MCS_SPINLOCK_H > > +#define __LINUX_MCS_SPINLOCK_H > > + > > +struct mcs_spinlock { > > + struct mcs_spinlock *next; > > + int locked; /* 1 if lock acquired */ > > +}; > > + > > +/* > > + * We don't inline mcs_spin_lock() so that perf can correctly account for the > > + * time spent in this lock function. > > + */ > > +static noinline > > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > +{ > > + struct mcs_spinlock *prev; > > + > > + /* Init node */ > > + node->locked = 0; > > + node->next = NULL; > > + > > + prev = xchg(lock, node); > > OK, the full memory barriers implied by xchg() ensure that *node will be > initialized before the "ACCESS_ONCE(prev->next) = node" below puts the > node into the list. This rules out the misordering scenario that Tim > Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK> > on September 27th. > > Assuming of course a corresponding barrier on the lock handoff side. > > > + if (likely(prev == NULL)) { > > + /* Lock acquired */ > > + node->locked = 1; > > + return; > > + } > > + ACCESS_ONCE(prev->next) = node; > > + smp_wmb(); > > I don't see what the above memory barrier does. Here are some things > that it cannot be doing: > > o Ordering the insertion into the list above with the polling > below. First, smp_wmb() does not order prior writes against > later reads, and second misordering is harmless. If we start > polling before the insertion is complete, all that happens > is that the first few polls have no chance of seeing a lock > grant. > > o Ordering the polling against the initialization -- the above > xchg() is already doing that for us. > > So what is its purpose? Agree that the smp_wmb is not needed. It is in the existing mcs code residing in mutex.c and we're re-factoring the code only here and hasn't corrected the memory barrier. The particular smp_wmb() is removed in Patch 4/4 that corrects the memory barriers. > > > + /* Wait until the lock holder passes the lock down */ > > + while (!ACCESS_ONCE(node->locked)) > > + arch_mutex_cpu_relax(); > > On the other hand, I don't see how we get away without a barrier here. > As written, what prevents the caller's load from ->owner from being > reordered with the above load from ->locked? (Perhaps you can argue > that such reordering is only a performance problem, but if so we need > that argument recorded in comments.) > > Of course, if anyone ever tries to use mcs_spin_lock() as a full lock, > they will need a memory barrier here to prevent the critical section > from leaking out. Agree too. The appropriate memory barrier is added in Patch 4/4. > > > +} > > + > > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > +{ > > + struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > + > > + if (likely(!next)) { > > + /* > > + * Release the lock by setting it to NULL > > + */ > > + if (cmpxchg(lock, node, NULL) == node) > > + return; > > + /* Wait until the next pointer is set */ > > + while (!(next = ACCESS_ONCE(node->next))) > > + arch_mutex_cpu_relax(); > > + } > > We need a memory barrier somewhere before here in this function, > otherwise the critical section can leak out. I do not believe that > we can rely on the prohibition against speculative stores that Peter > Zijlstra and I have been discussing because that does not provide the > transitivity required by locking primitives. I believe that we -could- > make the access below be an smp_store_release(), though. > > Placing the barrier here (or at least not preceding the initial > fetch from node->next) has the advantage of allowing it to pair with > the xchg() in mcs_spin_lock(), though given the dependency only an > smp_read_barrier_depends() is required for that purpose. > > > + ACCESS_ONCE(next->locked) = 1; > > + smp_wmb(); > > I don't see what this barrier does for us. It is ordering the unlock > store with what, exactly? > > If it really is doing something, we need a big fat comment stating what > that is, and checkpatch.pl will be happy to inform you. ;-) > > > +} > > + > > +#endif /* __LINUX_MCS_SPINLOCK_H */ > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h > > index bab49da..32a32e6 100644 > > --- a/include/linux/mutex.h > > +++ b/include/linux/mutex.h > > @@ -46,6 +46,7 @@ > > * - detects multi-task circular deadlocks and prints out all affected > > * locks and tasks (and only those tasks) > > */ > > +struct mcs_spinlock; > > struct mutex { > > /* 1: unlocked, 0: locked, negative: locked, possible waiters */ > > atomic_t count; > > @@ -55,7 +56,7 @@ struct mutex { > > struct task_struct *owner; > > #endif > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > - void *spin_mlock; /* Spinner MCS lock */ > > + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ > > #endif > > #ifdef CONFIG_DEBUG_MUTEXES > > const char *name; > > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); > > # define arch_mutex_cpu_relax() cpu_relax() > > #endif > > > > -#endif > > +#endif /* __LINUX_MUTEX_H */ > > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c > > index d24105b..e08b183 100644 > > --- a/kernel/locking/mutex.c > > +++ b/kernel/locking/mutex.c > > @@ -25,6 +25,7 @@ > > #include <linux/spinlock.h> > > #include <linux/interrupt.h> > > #include <linux/debug_locks.h> > > +#include <linux/mcs_spinlock.h> > > > > /* > > * In the DEBUG case we are using the "NULL fastpath" for mutexes, > > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) > > INIT_LIST_HEAD(&lock->wait_list); > > mutex_clear_owner(lock); > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > - lock->spin_mlock = NULL; > > + lock->mcs_lock = NULL; > > #endif > > > > debug_mutex_init(lock, name, key); > > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); > > * more or less simultaneously, the spinners need to acquire a MCS lock > > * first before spinning on the owner field. > > * > > - * We don't inline mspin_lock() so that perf can correctly account for the > > - * time spent in this lock function. > > */ > > -struct mspin_node { > > - struct mspin_node *next ; > > - int locked; /* 1 if lock acquired */ > > -}; > > -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) > > - > > -static noinline > > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) > > -{ > > - struct mspin_node *prev; > > - > > - /* Init node */ > > - node->locked = 0; > > - node->next = NULL; > > - > > - prev = xchg(lock, node); > > - if (likely(prev == NULL)) { > > - /* Lock acquired */ > > - node->locked = 1; > > - return; > > - } > > - ACCESS_ONCE(prev->next) = node; > > - smp_wmb(); > > - /* Wait until the lock holder passes the lock down */ > > - while (!ACCESS_ONCE(node->locked)) > > - arch_mutex_cpu_relax(); > > -} > > - > > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) > > -{ > > - struct mspin_node *next = ACCESS_ONCE(node->next); > > - > > - if (likely(!next)) { > > - /* > > - * Release the lock by setting it to NULL > > - */ > > - if (cmpxchg(lock, node, NULL) == node) > > - return; > > - /* Wait until the next pointer is set */ > > - while (!(next = ACCESS_ONCE(node->next))) > > - arch_mutex_cpu_relax(); > > - } > > - ACCESS_ONCE(next->locked) = 1; > > - smp_wmb(); > > -} > > > > /* > > * Mutex spinning code migrated from kernel/sched/core.c > > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > > for (;;) { > > struct task_struct *owner; > > - struct mspin_node node; > > + struct mcs_spinlock node; > > > > if (use_ww_ctx && ww_ctx->acquired > 0) { > > struct ww_mutex *ww; > > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > * If there's an owner, wait for it to either > > * release the lock or go to sleep. > > */ > > - mspin_lock(MLOCK(lock), &node); > > + mcs_spin_lock(&lock->mcs_lock, &node); > > owner = ACCESS_ONCE(lock->owner); > > if (owner && !mutex_spin_on_owner(lock, owner)) { > > - mspin_unlock(MLOCK(lock), &node); > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > goto slowpath; > > } > > > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > } > > > > mutex_set_owner(lock); > > - mspin_unlock(MLOCK(lock), &node); > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > preempt_enable(); > > return 0; > > } > > - mspin_unlock(MLOCK(lock), &node); > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > > /* > > * When there's no owner, we might have preempted between the > > -- > > 1.7.4.4 > > > > > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-19 19:42 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote: > > We will need the MCS lock code for doing optimistic spinning for rwsem > > and queue rwlock. Extracting the MCS code from mutex.c and put into > > its own file allow us to reuse this code easily. > > > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> > > Please see comments below. > Thanks for reviewing the code. > Thanx, Paul > > > --- > > include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ > > include/linux/mutex.h | 5 ++- > > kernel/locking/mutex.c | 60 ++++---------------------------------- > > 3 files changed, 74 insertions(+), 55 deletions(-) > > create mode 100644 include/linux/mcs_spinlock.h > > > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > > new file mode 100644 > > index 0000000..b5de3b0 > > --- /dev/null > > +++ b/include/linux/mcs_spinlock.h > > @@ -0,0 +1,64 @@ > > +/* > > + * MCS lock defines > > + * > > + * This file contains the main data structure and API definitions of MCS lock. > > + * > > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > > + * with the desirable properties of being fair, and with each cpu trying > > + * to acquire the lock spinning on a local variable. > > + * It avoids expensive cache bouncings that common test-and-set spin-lock > > + * implementations incur. > > + */ > > +#ifndef __LINUX_MCS_SPINLOCK_H > > +#define __LINUX_MCS_SPINLOCK_H > > + > > +struct mcs_spinlock { > > + struct mcs_spinlock *next; > > + int locked; /* 1 if lock acquired */ > > +}; > > + > > +/* > > + * We don't inline mcs_spin_lock() so that perf can correctly account for the > > + * time spent in this lock function. > > + */ > > +static noinline > > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > +{ > > + struct mcs_spinlock *prev; > > + > > + /* Init node */ > > + node->locked = 0; > > + node->next = NULL; > > + > > + prev = xchg(lock, node); > > OK, the full memory barriers implied by xchg() ensure that *node will be > initialized before the "ACCESS_ONCE(prev->next) = node" below puts the > node into the list. This rules out the misordering scenario that Tim > Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK> > on September 27th. > > Assuming of course a corresponding barrier on the lock handoff side. > > > + if (likely(prev == NULL)) { > > + /* Lock acquired */ > > + node->locked = 1; > > + return; > > + } > > + ACCESS_ONCE(prev->next) = node; > > + smp_wmb(); > > I don't see what the above memory barrier does. Here are some things > that it cannot be doing: > > o Ordering the insertion into the list above with the polling > below. First, smp_wmb() does not order prior writes against > later reads, and second misordering is harmless. If we start > polling before the insertion is complete, all that happens > is that the first few polls have no chance of seeing a lock > grant. > > o Ordering the polling against the initialization -- the above > xchg() is already doing that for us. > > So what is its purpose? Agree that the smp_wmb is not needed. It is in the existing mcs code residing in mutex.c and we're re-factoring the code only here and hasn't corrected the memory barrier. The particular smp_wmb() is removed in Patch 4/4 that corrects the memory barriers. > > > + /* Wait until the lock holder passes the lock down */ > > + while (!ACCESS_ONCE(node->locked)) > > + arch_mutex_cpu_relax(); > > On the other hand, I don't see how we get away without a barrier here. > As written, what prevents the caller's load from ->owner from being > reordered with the above load from ->locked? (Perhaps you can argue > that such reordering is only a performance problem, but if so we need > that argument recorded in comments.) > > Of course, if anyone ever tries to use mcs_spin_lock() as a full lock, > they will need a memory barrier here to prevent the critical section > from leaking out. Agree too. The appropriate memory barrier is added in Patch 4/4. > > > +} > > + > > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > +{ > > + struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > + > > + if (likely(!next)) { > > + /* > > + * Release the lock by setting it to NULL > > + */ > > + if (cmpxchg(lock, node, NULL) == node) > > + return; > > + /* Wait until the next pointer is set */ > > + while (!(next = ACCESS_ONCE(node->next))) > > + arch_mutex_cpu_relax(); > > + } > > We need a memory barrier somewhere before here in this function, > otherwise the critical section can leak out. I do not believe that > we can rely on the prohibition against speculative stores that Peter > Zijlstra and I have been discussing because that does not provide the > transitivity required by locking primitives. I believe that we -could- > make the access below be an smp_store_release(), though. > > Placing the barrier here (or at least not preceding the initial > fetch from node->next) has the advantage of allowing it to pair with > the xchg() in mcs_spin_lock(), though given the dependency only an > smp_read_barrier_depends() is required for that purpose. > > > + ACCESS_ONCE(next->locked) = 1; > > + smp_wmb(); > > I don't see what this barrier does for us. It is ordering the unlock > store with what, exactly? > > If it really is doing something, we need a big fat comment stating what > that is, and checkpatch.pl will be happy to inform you. ;-) > > > +} > > + > > +#endif /* __LINUX_MCS_SPINLOCK_H */ > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h > > index bab49da..32a32e6 100644 > > --- a/include/linux/mutex.h > > +++ b/include/linux/mutex.h > > @@ -46,6 +46,7 @@ > > * - detects multi-task circular deadlocks and prints out all affected > > * locks and tasks (and only those tasks) > > */ > > +struct mcs_spinlock; > > struct mutex { > > /* 1: unlocked, 0: locked, negative: locked, possible waiters */ > > atomic_t count; > > @@ -55,7 +56,7 @@ struct mutex { > > struct task_struct *owner; > > #endif > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > - void *spin_mlock; /* Spinner MCS lock */ > > + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ > > #endif > > #ifdef CONFIG_DEBUG_MUTEXES > > const char *name; > > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); > > # define arch_mutex_cpu_relax() cpu_relax() > > #endif > > > > -#endif > > +#endif /* __LINUX_MUTEX_H */ > > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c > > index d24105b..e08b183 100644 > > --- a/kernel/locking/mutex.c > > +++ b/kernel/locking/mutex.c > > @@ -25,6 +25,7 @@ > > #include <linux/spinlock.h> > > #include <linux/interrupt.h> > > #include <linux/debug_locks.h> > > +#include <linux/mcs_spinlock.h> > > > > /* > > * In the DEBUG case we are using the "NULL fastpath" for mutexes, > > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) > > INIT_LIST_HEAD(&lock->wait_list); > > mutex_clear_owner(lock); > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > - lock->spin_mlock = NULL; > > + lock->mcs_lock = NULL; > > #endif > > > > debug_mutex_init(lock, name, key); > > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); > > * more or less simultaneously, the spinners need to acquire a MCS lock > > * first before spinning on the owner field. > > * > > - * We don't inline mspin_lock() so that perf can correctly account for the > > - * time spent in this lock function. > > */ > > -struct mspin_node { > > - struct mspin_node *next ; > > - int locked; /* 1 if lock acquired */ > > -}; > > -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) > > - > > -static noinline > > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) > > -{ > > - struct mspin_node *prev; > > - > > - /* Init node */ > > - node->locked = 0; > > - node->next = NULL; > > - > > - prev = xchg(lock, node); > > - if (likely(prev == NULL)) { > > - /* Lock acquired */ > > - node->locked = 1; > > - return; > > - } > > - ACCESS_ONCE(prev->next) = node; > > - smp_wmb(); > > - /* Wait until the lock holder passes the lock down */ > > - while (!ACCESS_ONCE(node->locked)) > > - arch_mutex_cpu_relax(); > > -} > > - > > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) > > -{ > > - struct mspin_node *next = ACCESS_ONCE(node->next); > > - > > - if (likely(!next)) { > > - /* > > - * Release the lock by setting it to NULL > > - */ > > - if (cmpxchg(lock, node, NULL) == node) > > - return; > > - /* Wait until the next pointer is set */ > > - while (!(next = ACCESS_ONCE(node->next))) > > - arch_mutex_cpu_relax(); > > - } > > - ACCESS_ONCE(next->locked) = 1; > > - smp_wmb(); > > -} > > > > /* > > * Mutex spinning code migrated from kernel/sched/core.c > > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > > for (;;) { > > struct task_struct *owner; > > - struct mspin_node node; > > + struct mcs_spinlock node; > > > > if (use_ww_ctx && ww_ctx->acquired > 0) { > > struct ww_mutex *ww; > > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > * If there's an owner, wait for it to either > > * release the lock or go to sleep. > > */ > > - mspin_lock(MLOCK(lock), &node); > > + mcs_spin_lock(&lock->mcs_lock, &node); > > owner = ACCESS_ONCE(lock->owner); > > if (owner && !mutex_spin_on_owner(lock, owner)) { > > - mspin_unlock(MLOCK(lock), &node); > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > goto slowpath; > > } > > > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > } > > > > mutex_set_owner(lock); > > - mspin_unlock(MLOCK(lock), &node); > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > preempt_enable(); > > return 0; > > } > > - mspin_unlock(MLOCK(lock), &node); > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > > /* > > * When there's no owner, we might have preempted between the > > -- > > 1.7.4.4 > > > > > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-19 19:54 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:54 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott On Tue, Nov 19, 2013 at 11:42:32AM -0800, Tim Chen wrote: > On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote: > > On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote: > > > We will need the MCS lock code for doing optimistic spinning for rwsem > > > and queue rwlock. Extracting the MCS code from mutex.c and put into > > > its own file allow us to reuse this code easily. > > > > > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> > > > > Please see comments below. > > > > Thanks for reviewing the code. > > > Thanx, Paul > > > > > --- > > > include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ > > > include/linux/mutex.h | 5 ++- > > > kernel/locking/mutex.c | 60 ++++---------------------------------- > > > 3 files changed, 74 insertions(+), 55 deletions(-) > > > create mode 100644 include/linux/mcs_spinlock.h > > > > > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > > > new file mode 100644 > > > index 0000000..b5de3b0 > > > --- /dev/null > > > +++ b/include/linux/mcs_spinlock.h > > > @@ -0,0 +1,64 @@ > > > +/* > > > + * MCS lock defines > > > + * > > > + * This file contains the main data structure and API definitions of MCS lock. > > > + * > > > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > > > + * with the desirable properties of being fair, and with each cpu trying > > > + * to acquire the lock spinning on a local variable. > > > + * It avoids expensive cache bouncings that common test-and-set spin-lock > > > + * implementations incur. > > > + */ > > > +#ifndef __LINUX_MCS_SPINLOCK_H > > > +#define __LINUX_MCS_SPINLOCK_H > > > + > > > +struct mcs_spinlock { > > > + struct mcs_spinlock *next; > > > + int locked; /* 1 if lock acquired */ > > > +}; > > > + > > > +/* > > > + * We don't inline mcs_spin_lock() so that perf can correctly account for the > > > + * time spent in this lock function. > > > + */ > > > +static noinline > > > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > +{ > > > + struct mcs_spinlock *prev; > > > + > > > + /* Init node */ > > > + node->locked = 0; > > > + node->next = NULL; > > > + > > > + prev = xchg(lock, node); > > > > OK, the full memory barriers implied by xchg() ensure that *node will be > > initialized before the "ACCESS_ONCE(prev->next) = node" below puts the > > node into the list. This rules out the misordering scenario that Tim > > Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK> > > on September 27th. > > > > Assuming of course a corresponding barrier on the lock handoff side. > > > > > + if (likely(prev == NULL)) { > > > + /* Lock acquired */ > > > + node->locked = 1; > > > + return; > > > + } > > > + ACCESS_ONCE(prev->next) = node; > > > + smp_wmb(); > > > > I don't see what the above memory barrier does. Here are some things > > that it cannot be doing: > > > > o Ordering the insertion into the list above with the polling > > below. First, smp_wmb() does not order prior writes against > > later reads, and second misordering is harmless. If we start > > polling before the insertion is complete, all that happens > > is that the first few polls have no chance of seeing a lock > > grant. > > > > o Ordering the polling against the initialization -- the above > > xchg() is already doing that for us. > > > > So what is its purpose? > > Agree that the smp_wmb is not needed. It is in the existing mcs code > residing in mutex.c and we're re-factoring the code only here and hasn't > corrected the memory barrier. Ah, so I should have been more aggressive about reviewing some time back, then... ;-) Thanx, Paul > The particular smp_wmb() is removed in Patch 4/4 that corrects the > memory barriers. > > > > > > + /* Wait until the lock holder passes the lock down */ > > > + while (!ACCESS_ONCE(node->locked)) > > > + arch_mutex_cpu_relax(); > > > > On the other hand, I don't see how we get away without a barrier here. > > As written, what prevents the caller's load from ->owner from being > > reordered with the above load from ->locked? (Perhaps you can argue > > that such reordering is only a performance problem, but if so we need > > that argument recorded in comments.) > > > > Of course, if anyone ever tries to use mcs_spin_lock() as a full lock, > > they will need a memory barrier here to prevent the critical section > > from leaking out. > > Agree too. The appropriate memory barrier is added in Patch 4/4. > > > > > > +} > > > + > > > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > +{ > > > + struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > > + > > > + if (likely(!next)) { > > > + /* > > > + * Release the lock by setting it to NULL > > > + */ > > > + if (cmpxchg(lock, node, NULL) == node) > > > + return; > > > + /* Wait until the next pointer is set */ > > > + while (!(next = ACCESS_ONCE(node->next))) > > > + arch_mutex_cpu_relax(); > > > + } > > > > We need a memory barrier somewhere before here in this function, > > otherwise the critical section can leak out. I do not believe that > > we can rely on the prohibition against speculative stores that Peter > > Zijlstra and I have been discussing because that does not provide the > > transitivity required by locking primitives. I believe that we -could- > > make the access below be an smp_store_release(), though. > > > > Placing the barrier here (or at least not preceding the initial > > fetch from node->next) has the advantage of allowing it to pair with > > the xchg() in mcs_spin_lock(), though given the dependency only an > > smp_read_barrier_depends() is required for that purpose. > > > > > + ACCESS_ONCE(next->locked) = 1; > > > + smp_wmb(); > > > > I don't see what this barrier does for us. It is ordering the unlock > > store with what, exactly? > > > > If it really is doing something, we need a big fat comment stating what > > that is, and checkpatch.pl will be happy to inform you. ;-) > > > > > +} > > > + > > > +#endif /* __LINUX_MCS_SPINLOCK_H */ > > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h > > > index bab49da..32a32e6 100644 > > > --- a/include/linux/mutex.h > > > +++ b/include/linux/mutex.h > > > @@ -46,6 +46,7 @@ > > > * - detects multi-task circular deadlocks and prints out all affected > > > * locks and tasks (and only those tasks) > > > */ > > > +struct mcs_spinlock; > > > struct mutex { > > > /* 1: unlocked, 0: locked, negative: locked, possible waiters */ > > > atomic_t count; > > > @@ -55,7 +56,7 @@ struct mutex { > > > struct task_struct *owner; > > > #endif > > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > > - void *spin_mlock; /* Spinner MCS lock */ > > > + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ > > > #endif > > > #ifdef CONFIG_DEBUG_MUTEXES > > > const char *name; > > > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); > > > # define arch_mutex_cpu_relax() cpu_relax() > > > #endif > > > > > > -#endif > > > +#endif /* __LINUX_MUTEX_H */ > > > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c > > > index d24105b..e08b183 100644 > > > --- a/kernel/locking/mutex.c > > > +++ b/kernel/locking/mutex.c > > > @@ -25,6 +25,7 @@ > > > #include <linux/spinlock.h> > > > #include <linux/interrupt.h> > > > #include <linux/debug_locks.h> > > > +#include <linux/mcs_spinlock.h> > > > > > > /* > > > * In the DEBUG case we are using the "NULL fastpath" for mutexes, > > > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) > > > INIT_LIST_HEAD(&lock->wait_list); > > > mutex_clear_owner(lock); > > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > > - lock->spin_mlock = NULL; > > > + lock->mcs_lock = NULL; > > > #endif > > > > > > debug_mutex_init(lock, name, key); > > > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); > > > * more or less simultaneously, the spinners need to acquire a MCS lock > > > * first before spinning on the owner field. > > > * > > > - * We don't inline mspin_lock() so that perf can correctly account for the > > > - * time spent in this lock function. > > > */ > > > -struct mspin_node { > > > - struct mspin_node *next ; > > > - int locked; /* 1 if lock acquired */ > > > -}; > > > -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) > > > - > > > -static noinline > > > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) > > > -{ > > > - struct mspin_node *prev; > > > - > > > - /* Init node */ > > > - node->locked = 0; > > > - node->next = NULL; > > > - > > > - prev = xchg(lock, node); > > > - if (likely(prev == NULL)) { > > > - /* Lock acquired */ > > > - node->locked = 1; > > > - return; > > > - } > > > - ACCESS_ONCE(prev->next) = node; > > > - smp_wmb(); > > > - /* Wait until the lock holder passes the lock down */ > > > - while (!ACCESS_ONCE(node->locked)) > > > - arch_mutex_cpu_relax(); > > > -} > > > - > > > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) > > > -{ > > > - struct mspin_node *next = ACCESS_ONCE(node->next); > > > - > > > - if (likely(!next)) { > > > - /* > > > - * Release the lock by setting it to NULL > > > - */ > > > - if (cmpxchg(lock, node, NULL) == node) > > > - return; > > > - /* Wait until the next pointer is set */ > > > - while (!(next = ACCESS_ONCE(node->next))) > > > - arch_mutex_cpu_relax(); > > > - } > > > - ACCESS_ONCE(next->locked) = 1; > > > - smp_wmb(); > > > -} > > > > > > /* > > > * Mutex spinning code migrated from kernel/sched/core.c > > > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > > > > for (;;) { > > > struct task_struct *owner; > > > - struct mspin_node node; > > > + struct mcs_spinlock node; > > > > > > if (use_ww_ctx && ww_ctx->acquired > 0) { > > > struct ww_mutex *ww; > > > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > * If there's an owner, wait for it to either > > > * release the lock or go to sleep. > > > */ > > > - mspin_lock(MLOCK(lock), &node); > > > + mcs_spin_lock(&lock->mcs_lock, &node); > > > owner = ACCESS_ONCE(lock->owner); > > > if (owner && !mutex_spin_on_owner(lock, owner)) { > > > - mspin_unlock(MLOCK(lock), &node); > > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > goto slowpath; > > > } > > > > > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > } > > > > > > mutex_set_owner(lock); > > > - mspin_unlock(MLOCK(lock), &node); > > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > preempt_enable(); > > > return 0; > > > } > > > - mspin_unlock(MLOCK(lock), &node); > > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > > > > /* > > > * When there's no owner, we might have preempted between the > > > -- > > > 1.7.4.4 > > > > > > > > > > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > Please read the FAQ at http://www.tux.org/lkml/ > > ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-19 19:54 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:54 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, Nov 19, 2013 at 11:42:32AM -0800, Tim Chen wrote: > On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote: > > On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote: > > > We will need the MCS lock code for doing optimistic spinning for rwsem > > > and queue rwlock. Extracting the MCS code from mutex.c and put into > > > its own file allow us to reuse this code easily. > > > > > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> > > > > Please see comments below. > > > > Thanks for reviewing the code. > > > Thanx, Paul > > > > > --- > > > include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ > > > include/linux/mutex.h | 5 ++- > > > kernel/locking/mutex.c | 60 ++++---------------------------------- > > > 3 files changed, 74 insertions(+), 55 deletions(-) > > > create mode 100644 include/linux/mcs_spinlock.h > > > > > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > > > new file mode 100644 > > > index 0000000..b5de3b0 > > > --- /dev/null > > > +++ b/include/linux/mcs_spinlock.h > > > @@ -0,0 +1,64 @@ > > > +/* > > > + * MCS lock defines > > > + * > > > + * This file contains the main data structure and API definitions of MCS lock. > > > + * > > > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > > > + * with the desirable properties of being fair, and with each cpu trying > > > + * to acquire the lock spinning on a local variable. > > > + * It avoids expensive cache bouncings that common test-and-set spin-lock > > > + * implementations incur. > > > + */ > > > +#ifndef __LINUX_MCS_SPINLOCK_H > > > +#define __LINUX_MCS_SPINLOCK_H > > > + > > > +struct mcs_spinlock { > > > + struct mcs_spinlock *next; > > > + int locked; /* 1 if lock acquired */ > > > +}; > > > + > > > +/* > > > + * We don't inline mcs_spin_lock() so that perf can correctly account for the > > > + * time spent in this lock function. > > > + */ > > > +static noinline > > > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > +{ > > > + struct mcs_spinlock *prev; > > > + > > > + /* Init node */ > > > + node->locked = 0; > > > + node->next = NULL; > > > + > > > + prev = xchg(lock, node); > > > > OK, the full memory barriers implied by xchg() ensure that *node will be > > initialized before the "ACCESS_ONCE(prev->next) = node" below puts the > > node into the list. This rules out the misordering scenario that Tim > > Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK> > > on September 27th. > > > > Assuming of course a corresponding barrier on the lock handoff side. > > > > > + if (likely(prev == NULL)) { > > > + /* Lock acquired */ > > > + node->locked = 1; > > > + return; > > > + } > > > + ACCESS_ONCE(prev->next) = node; > > > + smp_wmb(); > > > > I don't see what the above memory barrier does. Here are some things > > that it cannot be doing: > > > > o Ordering the insertion into the list above with the polling > > below. First, smp_wmb() does not order prior writes against > > later reads, and second misordering is harmless. If we start > > polling before the insertion is complete, all that happens > > is that the first few polls have no chance of seeing a lock > > grant. > > > > o Ordering the polling against the initialization -- the above > > xchg() is already doing that for us. > > > > So what is its purpose? > > Agree that the smp_wmb is not needed. It is in the existing mcs code > residing in mutex.c and we're re-factoring the code only here and hasn't > corrected the memory barrier. Ah, so I should have been more aggressive about reviewing some time back, then... ;-) Thanx, Paul > The particular smp_wmb() is removed in Patch 4/4 that corrects the > memory barriers. > > > > > > + /* Wait until the lock holder passes the lock down */ > > > + while (!ACCESS_ONCE(node->locked)) > > > + arch_mutex_cpu_relax(); > > > > On the other hand, I don't see how we get away without a barrier here. > > As written, what prevents the caller's load from ->owner from being > > reordered with the above load from ->locked? (Perhaps you can argue > > that such reordering is only a performance problem, but if so we need > > that argument recorded in comments.) > > > > Of course, if anyone ever tries to use mcs_spin_lock() as a full lock, > > they will need a memory barrier here to prevent the critical section > > from leaking out. > > Agree too. The appropriate memory barrier is added in Patch 4/4. > > > > > > +} > > > + > > > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > +{ > > > + struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > > + > > > + if (likely(!next)) { > > > + /* > > > + * Release the lock by setting it to NULL > > > + */ > > > + if (cmpxchg(lock, node, NULL) == node) > > > + return; > > > + /* Wait until the next pointer is set */ > > > + while (!(next = ACCESS_ONCE(node->next))) > > > + arch_mutex_cpu_relax(); > > > + } > > > > We need a memory barrier somewhere before here in this function, > > otherwise the critical section can leak out. I do not believe that > > we can rely on the prohibition against speculative stores that Peter > > Zijlstra and I have been discussing because that does not provide the > > transitivity required by locking primitives. I believe that we -could- > > make the access below be an smp_store_release(), though. > > > > Placing the barrier here (or at least not preceding the initial > > fetch from node->next) has the advantage of allowing it to pair with > > the xchg() in mcs_spin_lock(), though given the dependency only an > > smp_read_barrier_depends() is required for that purpose. > > > > > + ACCESS_ONCE(next->locked) = 1; > > > + smp_wmb(); > > > > I don't see what this barrier does for us. It is ordering the unlock > > store with what, exactly? > > > > If it really is doing something, we need a big fat comment stating what > > that is, and checkpatch.pl will be happy to inform you. ;-) > > > > > +} > > > + > > > +#endif /* __LINUX_MCS_SPINLOCK_H */ > > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h > > > index bab49da..32a32e6 100644 > > > --- a/include/linux/mutex.h > > > +++ b/include/linux/mutex.h > > > @@ -46,6 +46,7 @@ > > > * - detects multi-task circular deadlocks and prints out all affected > > > * locks and tasks (and only those tasks) > > > */ > > > +struct mcs_spinlock; > > > struct mutex { > > > /* 1: unlocked, 0: locked, negative: locked, possible waiters */ > > > atomic_t count; > > > @@ -55,7 +56,7 @@ struct mutex { > > > struct task_struct *owner; > > > #endif > > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > > - void *spin_mlock; /* Spinner MCS lock */ > > > + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ > > > #endif > > > #ifdef CONFIG_DEBUG_MUTEXES > > > const char *name; > > > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); > > > # define arch_mutex_cpu_relax() cpu_relax() > > > #endif > > > > > > -#endif > > > +#endif /* __LINUX_MUTEX_H */ > > > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c > > > index d24105b..e08b183 100644 > > > --- a/kernel/locking/mutex.c > > > +++ b/kernel/locking/mutex.c > > > @@ -25,6 +25,7 @@ > > > #include <linux/spinlock.h> > > > #include <linux/interrupt.h> > > > #include <linux/debug_locks.h> > > > +#include <linux/mcs_spinlock.h> > > > > > > /* > > > * In the DEBUG case we are using the "NULL fastpath" for mutexes, > > > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) > > > INIT_LIST_HEAD(&lock->wait_list); > > > mutex_clear_owner(lock); > > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > > - lock->spin_mlock = NULL; > > > + lock->mcs_lock = NULL; > > > #endif > > > > > > debug_mutex_init(lock, name, key); > > > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); > > > * more or less simultaneously, the spinners need to acquire a MCS lock > > > * first before spinning on the owner field. > > > * > > > - * We don't inline mspin_lock() so that perf can correctly account for the > > > - * time spent in this lock function. > > > */ > > > -struct mspin_node { > > > - struct mspin_node *next ; > > > - int locked; /* 1 if lock acquired */ > > > -}; > > > -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) > > > - > > > -static noinline > > > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) > > > -{ > > > - struct mspin_node *prev; > > > - > > > - /* Init node */ > > > - node->locked = 0; > > > - node->next = NULL; > > > - > > > - prev = xchg(lock, node); > > > - if (likely(prev == NULL)) { > > > - /* Lock acquired */ > > > - node->locked = 1; > > > - return; > > > - } > > > - ACCESS_ONCE(prev->next) = node; > > > - smp_wmb(); > > > - /* Wait until the lock holder passes the lock down */ > > > - while (!ACCESS_ONCE(node->locked)) > > > - arch_mutex_cpu_relax(); > > > -} > > > - > > > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) > > > -{ > > > - struct mspin_node *next = ACCESS_ONCE(node->next); > > > - > > > - if (likely(!next)) { > > > - /* > > > - * Release the lock by setting it to NULL > > > - */ > > > - if (cmpxchg(lock, node, NULL) == node) > > > - return; > > > - /* Wait until the next pointer is set */ > > > - while (!(next = ACCESS_ONCE(node->next))) > > > - arch_mutex_cpu_relax(); > > > - } > > > - ACCESS_ONCE(next->locked) = 1; > > > - smp_wmb(); > > > -} > > > > > > /* > > > * Mutex spinning code migrated from kernel/sched/core.c > > > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > > > > for (;;) { > > > struct task_struct *owner; > > > - struct mspin_node node; > > > + struct mcs_spinlock node; > > > > > > if (use_ww_ctx && ww_ctx->acquired > 0) { > > > struct ww_mutex *ww; > > > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > * If there's an owner, wait for it to either > > > * release the lock or go to sleep. > > > */ > > > - mspin_lock(MLOCK(lock), &node); > > > + mcs_spin_lock(&lock->mcs_lock, &node); > > > owner = ACCESS_ONCE(lock->owner); > > > if (owner && !mutex_spin_on_owner(lock, owner)) { > > > - mspin_unlock(MLOCK(lock), &node); > > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > goto slowpath; > > > } > > > > > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > } > > > > > > mutex_set_owner(lock); > > > - mspin_unlock(MLOCK(lock), &node); > > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > preempt_enable(); > > > return 0; > > > } > > > - mspin_unlock(MLOCK(lock), &node); > > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > > > > /* > > > * When there's no owner, we might have preempted between the > > > -- > > > 1.7.4.4 > > > > > > > > > > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > Please read the FAQ at http://www.tux.org/lkml/ > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file @ 2013-11-19 19:54 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:54 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, Nov 19, 2013 at 11:42:32AM -0800, Tim Chen wrote: > On Tue, 2013-11-19 at 11:10 -0800, Paul E. McKenney wrote: > > On Fri, Nov 08, 2013 at 11:51:52AM -0800, Tim Chen wrote: > > > We will need the MCS lock code for doing optimistic spinning for rwsem > > > and queue rwlock. Extracting the MCS code from mutex.c and put into > > > its own file allow us to reuse this code easily. > > > > > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > > Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> > > > > Please see comments below. > > > > Thanks for reviewing the code. > > > Thanx, Paul > > > > > --- > > > include/linux/mcs_spinlock.h | 64 ++++++++++++++++++++++++++++++++++++++++++ > > > include/linux/mutex.h | 5 ++- > > > kernel/locking/mutex.c | 60 ++++---------------------------------- > > > 3 files changed, 74 insertions(+), 55 deletions(-) > > > create mode 100644 include/linux/mcs_spinlock.h > > > > > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > > > new file mode 100644 > > > index 0000000..b5de3b0 > > > --- /dev/null > > > +++ b/include/linux/mcs_spinlock.h > > > @@ -0,0 +1,64 @@ > > > +/* > > > + * MCS lock defines > > > + * > > > + * This file contains the main data structure and API definitions of MCS lock. > > > + * > > > + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > > > + * with the desirable properties of being fair, and with each cpu trying > > > + * to acquire the lock spinning on a local variable. > > > + * It avoids expensive cache bouncings that common test-and-set spin-lock > > > + * implementations incur. > > > + */ > > > +#ifndef __LINUX_MCS_SPINLOCK_H > > > +#define __LINUX_MCS_SPINLOCK_H > > > + > > > +struct mcs_spinlock { > > > + struct mcs_spinlock *next; > > > + int locked; /* 1 if lock acquired */ > > > +}; > > > + > > > +/* > > > + * We don't inline mcs_spin_lock() so that perf can correctly account for the > > > + * time spent in this lock function. > > > + */ > > > +static noinline > > > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > +{ > > > + struct mcs_spinlock *prev; > > > + > > > + /* Init node */ > > > + node->locked = 0; > > > + node->next = NULL; > > > + > > > + prev = xchg(lock, node); > > > > OK, the full memory barriers implied by xchg() ensure that *node will be > > initialized before the "ACCESS_ONCE(prev->next) = node" below puts the > > node into the list. This rules out the misordering scenario that Tim > > Chen called out in message-id <1380322005.3467.186.camel@schen9-DESK> > > on September 27th. > > > > Assuming of course a corresponding barrier on the lock handoff side. > > > > > + if (likely(prev == NULL)) { > > > + /* Lock acquired */ > > > + node->locked = 1; > > > + return; > > > + } > > > + ACCESS_ONCE(prev->next) = node; > > > + smp_wmb(); > > > > I don't see what the above memory barrier does. Here are some things > > that it cannot be doing: > > > > o Ordering the insertion into the list above with the polling > > below. First, smp_wmb() does not order prior writes against > > later reads, and second misordering is harmless. If we start > > polling before the insertion is complete, all that happens > > is that the first few polls have no chance of seeing a lock > > grant. > > > > o Ordering the polling against the initialization -- the above > > xchg() is already doing that for us. > > > > So what is its purpose? > > Agree that the smp_wmb is not needed. It is in the existing mcs code > residing in mutex.c and we're re-factoring the code only here and hasn't > corrected the memory barrier. Ah, so I should have been more aggressive about reviewing some time back, then... ;-) Thanx, Paul > The particular smp_wmb() is removed in Patch 4/4 that corrects the > memory barriers. > > > > > > + /* Wait until the lock holder passes the lock down */ > > > + while (!ACCESS_ONCE(node->locked)) > > > + arch_mutex_cpu_relax(); > > > > On the other hand, I don't see how we get away without a barrier here. > > As written, what prevents the caller's load from ->owner from being > > reordered with the above load from ->locked? (Perhaps you can argue > > that such reordering is only a performance problem, but if so we need > > that argument recorded in comments.) > > > > Of course, if anyone ever tries to use mcs_spin_lock() as a full lock, > > they will need a memory barrier here to prevent the critical section > > from leaking out. > > Agree too. The appropriate memory barrier is added in Patch 4/4. > > > > > > +} > > > + > > > +static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > +{ > > > + struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > > + > > > + if (likely(!next)) { > > > + /* > > > + * Release the lock by setting it to NULL > > > + */ > > > + if (cmpxchg(lock, node, NULL) == node) > > > + return; > > > + /* Wait until the next pointer is set */ > > > + while (!(next = ACCESS_ONCE(node->next))) > > > + arch_mutex_cpu_relax(); > > > + } > > > > We need a memory barrier somewhere before here in this function, > > otherwise the critical section can leak out. I do not believe that > > we can rely on the prohibition against speculative stores that Peter > > Zijlstra and I have been discussing because that does not provide the > > transitivity required by locking primitives. I believe that we -could- > > make the access below be an smp_store_release(), though. > > > > Placing the barrier here (or at least not preceding the initial > > fetch from node->next) has the advantage of allowing it to pair with > > the xchg() in mcs_spin_lock(), though given the dependency only an > > smp_read_barrier_depends() is required for that purpose. > > > > > + ACCESS_ONCE(next->locked) = 1; > > > + smp_wmb(); > > > > I don't see what this barrier does for us. It is ordering the unlock > > store with what, exactly? > > > > If it really is doing something, we need a big fat comment stating what > > that is, and checkpatch.pl will be happy to inform you. ;-) > > > > > +} > > > + > > > +#endif /* __LINUX_MCS_SPINLOCK_H */ > > > diff --git a/include/linux/mutex.h b/include/linux/mutex.h > > > index bab49da..32a32e6 100644 > > > --- a/include/linux/mutex.h > > > +++ b/include/linux/mutex.h > > > @@ -46,6 +46,7 @@ > > > * - detects multi-task circular deadlocks and prints out all affected > > > * locks and tasks (and only those tasks) > > > */ > > > +struct mcs_spinlock; > > > struct mutex { > > > /* 1: unlocked, 0: locked, negative: locked, possible waiters */ > > > atomic_t count; > > > @@ -55,7 +56,7 @@ struct mutex { > > > struct task_struct *owner; > > > #endif > > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > > - void *spin_mlock; /* Spinner MCS lock */ > > > + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ > > > #endif > > > #ifdef CONFIG_DEBUG_MUTEXES > > > const char *name; > > > @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); > > > # define arch_mutex_cpu_relax() cpu_relax() > > > #endif > > > > > > -#endif > > > +#endif /* __LINUX_MUTEX_H */ > > > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c > > > index d24105b..e08b183 100644 > > > --- a/kernel/locking/mutex.c > > > +++ b/kernel/locking/mutex.c > > > @@ -25,6 +25,7 @@ > > > #include <linux/spinlock.h> > > > #include <linux/interrupt.h> > > > #include <linux/debug_locks.h> > > > +#include <linux/mcs_spinlock.h> > > > > > > /* > > > * In the DEBUG case we are using the "NULL fastpath" for mutexes, > > > @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) > > > INIT_LIST_HEAD(&lock->wait_list); > > > mutex_clear_owner(lock); > > > #ifdef CONFIG_MUTEX_SPIN_ON_OWNER > > > - lock->spin_mlock = NULL; > > > + lock->mcs_lock = NULL; > > > #endif > > > > > > debug_mutex_init(lock, name, key); > > > @@ -111,54 +112,7 @@ EXPORT_SYMBOL(mutex_lock); > > > * more or less simultaneously, the spinners need to acquire a MCS lock > > > * first before spinning on the owner field. > > > * > > > - * We don't inline mspin_lock() so that perf can correctly account for the > > > - * time spent in this lock function. > > > */ > > > -struct mspin_node { > > > - struct mspin_node *next ; > > > - int locked; /* 1 if lock acquired */ > > > -}; > > > -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) > > > - > > > -static noinline > > > -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) > > > -{ > > > - struct mspin_node *prev; > > > - > > > - /* Init node */ > > > - node->locked = 0; > > > - node->next = NULL; > > > - > > > - prev = xchg(lock, node); > > > - if (likely(prev == NULL)) { > > > - /* Lock acquired */ > > > - node->locked = 1; > > > - return; > > > - } > > > - ACCESS_ONCE(prev->next) = node; > > > - smp_wmb(); > > > - /* Wait until the lock holder passes the lock down */ > > > - while (!ACCESS_ONCE(node->locked)) > > > - arch_mutex_cpu_relax(); > > > -} > > > - > > > -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) > > > -{ > > > - struct mspin_node *next = ACCESS_ONCE(node->next); > > > - > > > - if (likely(!next)) { > > > - /* > > > - * Release the lock by setting it to NULL > > > - */ > > > - if (cmpxchg(lock, node, NULL) == node) > > > - return; > > > - /* Wait until the next pointer is set */ > > > - while (!(next = ACCESS_ONCE(node->next))) > > > - arch_mutex_cpu_relax(); > > > - } > > > - ACCESS_ONCE(next->locked) = 1; > > > - smp_wmb(); > > > -} > > > > > > /* > > > * Mutex spinning code migrated from kernel/sched/core.c > > > @@ -448,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > > > > for (;;) { > > > struct task_struct *owner; > > > - struct mspin_node node; > > > + struct mcs_spinlock node; > > > > > > if (use_ww_ctx && ww_ctx->acquired > 0) { > > > struct ww_mutex *ww; > > > @@ -470,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > * If there's an owner, wait for it to either > > > * release the lock or go to sleep. > > > */ > > > - mspin_lock(MLOCK(lock), &node); > > > + mcs_spin_lock(&lock->mcs_lock, &node); > > > owner = ACCESS_ONCE(lock->owner); > > > if (owner && !mutex_spin_on_owner(lock, owner)) { > > > - mspin_unlock(MLOCK(lock), &node); > > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > goto slowpath; > > > } > > > > > > @@ -488,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, > > > } > > > > > > mutex_set_owner(lock); > > > - mspin_unlock(MLOCK(lock), &node); > > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > preempt_enable(); > > > return 0; > > > } > > > - mspin_unlock(MLOCK(lock), &node); > > > + mcs_spin_unlock(&lock->mcs_lock, &node); > > > > > > /* > > > * When there's no owner, we might have preempted between the > > > -- > > > 1.7.4.4 > > > > > > > > > > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > Please read the FAQ at http://www.tux.org/lkml/ > > ^ permalink raw reply [flat|nested] 70+ messages in thread
* [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-08 19:52 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton From: Jason Low <jason.low2@hp.com> Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node check in mcs_spin_unlock() likely() as it is likely that a race did not occur most of the time. Also add in more comments describing how the local node is used in MCS locks. Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Jason Low <jason.low2@hp.com> --- include/linux/mcs_spinlock.h | 13 +++++++++++-- 1 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index b5de3b0..96f14299 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -18,6 +18,12 @@ struct mcs_spinlock { }; /* + * In order to acquire the lock, the caller should declare a local node and + * pass a reference of the node to this function in addition to the lock. + * If the lock has already been acquired, then this will proceed to spin + * on this node->locked until the previous lock holder sets the node->locked + * in mcs_spin_unlock(). + * * We don't inline mcs_spin_lock() so that perf can correctly account for the * time spent in this lock function. */ @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) prev = xchg(lock, node); if (likely(prev == NULL)) { /* Lock acquired */ - node->locked = 1; return; } ACCESS_ONCE(prev->next) = node; @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) arch_mutex_cpu_relax(); } +/* + * Releases the lock. The caller should pass in the corresponding node that + * was used to acquire the lock. + */ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { struct mcs_spinlock *next = ACCESS_ONCE(node->next); @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod /* * Release the lock by setting it to NULL */ - if (cmpxchg(lock, node, NULL) == node) + if (likely(cmpxchg(lock, node, NULL) == node)) return; /* Wait until the next pointer is set */ while (!(next = ACCESS_ONCE(node->next))) -- 1.7.4.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-08 19:52 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang From: Jason Low <jason.low2@hp.com> Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node check in mcs_spin_unlock() likely() as it is likely that a race did not occur most of the time. Also add in more comments describing how the local node is used in MCS locks. Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Jason Low <jason.low2@hp.com> --- include/linux/mcs_spinlock.h | 13 +++++++++++-- 1 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index b5de3b0..96f14299 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -18,6 +18,12 @@ struct mcs_spinlock { }; /* + * In order to acquire the lock, the caller should declare a local node and + * pass a reference of the node to this function in addition to the lock. + * If the lock has already been acquired, then this will proceed to spin + * on this node->locked until the previous lock holder sets the node->locked + * in mcs_spin_unlock(). + * * We don't inline mcs_spin_lock() so that perf can correctly account for the * time spent in this lock function. */ @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) prev = xchg(lock, node); if (likely(prev == NULL)) { /* Lock acquired */ - node->locked = 1; return; } ACCESS_ONCE(prev->next) = node; @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) arch_mutex_cpu_relax(); } +/* + * Releases the lock. The caller should pass in the corresponding node that + * was used to acquire the lock. + */ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { struct mcs_spinlock *next = ACCESS_ONCE(node->next); @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod /* * Release the lock by setting it to NULL */ - if (cmpxchg(lock, node, NULL) == node) + if (likely(cmpxchg(lock, node, NULL) == node)) return; /* Wait until the next pointer is set */ while (!(next = ACCESS_ONCE(node->next))) -- 1.7.4.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-08 19:52 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang From: Jason Low <jason.low2@hp.com> Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node check in mcs_spin_unlock() likely() as it is likely that a race did not occur most of the time. Also add in more comments describing how the local node is used in MCS locks. Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Jason Low <jason.low2@hp.com> --- include/linux/mcs_spinlock.h | 13 +++++++++++-- 1 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index b5de3b0..96f14299 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -18,6 +18,12 @@ struct mcs_spinlock { }; /* + * In order to acquire the lock, the caller should declare a local node and + * pass a reference of the node to this function in addition to the lock. + * If the lock has already been acquired, then this will proceed to spin + * on this node->locked until the previous lock holder sets the node->locked + * in mcs_spin_unlock(). + * * We don't inline mcs_spin_lock() so that perf can correctly account for the * time spent in this lock function. */ @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) prev = xchg(lock, node); if (likely(prev == NULL)) { /* Lock acquired */ - node->locked = 1; return; } ACCESS_ONCE(prev->next) = node; @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) arch_mutex_cpu_relax(); } +/* + * Releases the lock. The caller should pass in the corresponding node that + * was used to acquire the lock. + */ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { struct mcs_spinlock *next = ACCESS_ONCE(node->next); @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod /* * Release the lock by setting it to NULL */ - if (cmpxchg(lock, node, NULL) == node) + if (likely(cmpxchg(lock, node, NULL) == node)) return; /* Wait until the next pointer is set */ while (!(next = ACCESS_ONCE(node->next))) -- 1.7.4.4 ^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 19:13 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:13 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, "H. Peter Anvin" <hpa> On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > From: Jason Low <jason.low2@hp.com> > > Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node > check in mcs_spin_unlock() likely() as it is likely that a race did not occur > most of the time. > > Also add in more comments describing how the local node is used in MCS locks. > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> > Signed-off-by: Jason Low <jason.low2@hp.com> > --- > include/linux/mcs_spinlock.h | 13 +++++++++++-- > 1 files changed, 11 insertions(+), 2 deletions(-) > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > index b5de3b0..96f14299 100644 > --- a/include/linux/mcs_spinlock.h > +++ b/include/linux/mcs_spinlock.h > @@ -18,6 +18,12 @@ struct mcs_spinlock { > }; > > /* > + * In order to acquire the lock, the caller should declare a local node and > + * pass a reference of the node to this function in addition to the lock. > + * If the lock has already been acquired, then this will proceed to spin > + * on this node->locked until the previous lock holder sets the node->locked > + * in mcs_spin_unlock(). > + * > * We don't inline mcs_spin_lock() so that perf can correctly account for the > * time spent in this lock function. > */ > @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > prev = xchg(lock, node); > if (likely(prev == NULL)) { > /* Lock acquired */ > - node->locked = 1; Agreed, no one looks at this field in this case, so no need to initialize it, unless for debug purposes. > return; > } > ACCESS_ONCE(prev->next) = node; > @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > arch_mutex_cpu_relax(); > } > > +/* > + * Releases the lock. The caller should pass in the corresponding node that > + * was used to acquire the lock. > + */ > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > { > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > /* > * Release the lock by setting it to NULL > */ > - if (cmpxchg(lock, node, NULL) == node) > + if (likely(cmpxchg(lock, node, NULL) == node)) Agreed here as well. Takes a narrow race to hit this. So, did your testing exercise this path? If the answer is "yes", and if the issues that I called out in patch #1 are resolved: Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > return; > /* Wait until the next pointer is set */ > while (!(next = ACCESS_ONCE(node->next))) > -- > 1.7.4.4 > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 19:13 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:13 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > From: Jason Low <jason.low2@hp.com> > > Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node > check in mcs_spin_unlock() likely() as it is likely that a race did not occur > most of the time. > > Also add in more comments describing how the local node is used in MCS locks. > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> > Signed-off-by: Jason Low <jason.low2@hp.com> > --- > include/linux/mcs_spinlock.h | 13 +++++++++++-- > 1 files changed, 11 insertions(+), 2 deletions(-) > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > index b5de3b0..96f14299 100644 > --- a/include/linux/mcs_spinlock.h > +++ b/include/linux/mcs_spinlock.h > @@ -18,6 +18,12 @@ struct mcs_spinlock { > }; > > /* > + * In order to acquire the lock, the caller should declare a local node and > + * pass a reference of the node to this function in addition to the lock. > + * If the lock has already been acquired, then this will proceed to spin > + * on this node->locked until the previous lock holder sets the node->locked > + * in mcs_spin_unlock(). > + * > * We don't inline mcs_spin_lock() so that perf can correctly account for the > * time spent in this lock function. > */ > @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > prev = xchg(lock, node); > if (likely(prev == NULL)) { > /* Lock acquired */ > - node->locked = 1; Agreed, no one looks at this field in this case, so no need to initialize it, unless for debug purposes. > return; > } > ACCESS_ONCE(prev->next) = node; > @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > arch_mutex_cpu_relax(); > } > > +/* > + * Releases the lock. The caller should pass in the corresponding node that > + * was used to acquire the lock. > + */ > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > { > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > /* > * Release the lock by setting it to NULL > */ > - if (cmpxchg(lock, node, NULL) == node) > + if (likely(cmpxchg(lock, node, NULL) == node)) Agreed here as well. Takes a narrow race to hit this. So, did your testing exercise this path? If the answer is "yes", and if the issues that I called out in patch #1 are resolved: Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > return; > /* Wait until the next pointer is set */ > while (!(next = ACCESS_ONCE(node->next))) > -- > 1.7.4.4 > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 19:13 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:13 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > From: Jason Low <jason.low2@hp.com> > > Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node > check in mcs_spin_unlock() likely() as it is likely that a race did not occur > most of the time. > > Also add in more comments describing how the local node is used in MCS locks. > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> > Signed-off-by: Jason Low <jason.low2@hp.com> > --- > include/linux/mcs_spinlock.h | 13 +++++++++++-- > 1 files changed, 11 insertions(+), 2 deletions(-) > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > index b5de3b0..96f14299 100644 > --- a/include/linux/mcs_spinlock.h > +++ b/include/linux/mcs_spinlock.h > @@ -18,6 +18,12 @@ struct mcs_spinlock { > }; > > /* > + * In order to acquire the lock, the caller should declare a local node and > + * pass a reference of the node to this function in addition to the lock. > + * If the lock has already been acquired, then this will proceed to spin > + * on this node->locked until the previous lock holder sets the node->locked > + * in mcs_spin_unlock(). > + * > * We don't inline mcs_spin_lock() so that perf can correctly account for the > * time spent in this lock function. > */ > @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > prev = xchg(lock, node); > if (likely(prev == NULL)) { > /* Lock acquired */ > - node->locked = 1; Agreed, no one looks at this field in this case, so no need to initialize it, unless for debug purposes. > return; > } > ACCESS_ONCE(prev->next) = node; > @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > arch_mutex_cpu_relax(); > } > > +/* > + * Releases the lock. The caller should pass in the corresponding node that > + * was used to acquire the lock. > + */ > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > { > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > /* > * Release the lock by setting it to NULL > */ > - if (cmpxchg(lock, node, NULL) == node) > + if (likely(cmpxchg(lock, node, NULL) == node)) Agreed here as well. Takes a narrow race to hit this. So, did your testing exercise this path? If the answer is "yes", and if the issues that I called out in patch #1 are resolved: Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > return; > /* Wait until the next pointer is set */ > while (!(next = ACCESS_ONCE(node->next))) > -- > 1.7.4.4 > > > ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 19:42 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > > From: Jason Low <jason.low2@hp.com> > > > > Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node > > check in mcs_spin_unlock() likely() as it is likely that a race did not occur > > most of the time. > > > > Also add in more comments describing how the local node is used in MCS locks. > > > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> > > Signed-off-by: Jason Low <jason.low2@hp.com> > > --- > > include/linux/mcs_spinlock.h | 13 +++++++++++-- > > 1 files changed, 11 insertions(+), 2 deletions(-) > > > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > > index b5de3b0..96f14299 100644 > > --- a/include/linux/mcs_spinlock.h > > +++ b/include/linux/mcs_spinlock.h > > @@ -18,6 +18,12 @@ struct mcs_spinlock { > > }; > > > > /* > > + * In order to acquire the lock, the caller should declare a local node and > > + * pass a reference of the node to this function in addition to the lock. > > + * If the lock has already been acquired, then this will proceed to spin > > + * on this node->locked until the previous lock holder sets the node->locked > > + * in mcs_spin_unlock(). > > + * > > * We don't inline mcs_spin_lock() so that perf can correctly account for the > > * time spent in this lock function. > > */ > > @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > prev = xchg(lock, node); > > if (likely(prev == NULL)) { > > /* Lock acquired */ > > - node->locked = 1; > > Agreed, no one looks at this field in this case, so no need to initialize > it, unless for debug purposes. > > > return; > > } > > ACCESS_ONCE(prev->next) = node; > > @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > arch_mutex_cpu_relax(); > > } > > > > +/* > > + * Releases the lock. The caller should pass in the corresponding node that > > + * was used to acquire the lock. > > + */ > > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > { > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > /* > > * Release the lock by setting it to NULL > > */ > > - if (cmpxchg(lock, node, NULL) == node) > > + if (likely(cmpxchg(lock, node, NULL) == node)) > > Agreed here as well. Takes a narrow race to hit this. > > So, did your testing exercise this path? If the answer is "yes", and > if the issues that I called out in patch #1 are resolved: I haven't instrumented the code to check the hit rate of this path. But the slow path probably will only get hit in some cases under heavy contention. > > Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > > > return; > > /* Wait until the next pointer is set */ > > while (!(next = ACCESS_ONCE(node->next))) > > -- > > 1.7.4.4 > > > > > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 19:42 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > > From: Jason Low <jason.low2@hp.com> > > > > Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node > > check in mcs_spin_unlock() likely() as it is likely that a race did not occur > > most of the time. > > > > Also add in more comments describing how the local node is used in MCS locks. > > > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> > > Signed-off-by: Jason Low <jason.low2@hp.com> > > --- > > include/linux/mcs_spinlock.h | 13 +++++++++++-- > > 1 files changed, 11 insertions(+), 2 deletions(-) > > > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > > index b5de3b0..96f14299 100644 > > --- a/include/linux/mcs_spinlock.h > > +++ b/include/linux/mcs_spinlock.h > > @@ -18,6 +18,12 @@ struct mcs_spinlock { > > }; > > > > /* > > + * In order to acquire the lock, the caller should declare a local node and > > + * pass a reference of the node to this function in addition to the lock. > > + * If the lock has already been acquired, then this will proceed to spin > > + * on this node->locked until the previous lock holder sets the node->locked > > + * in mcs_spin_unlock(). > > + * > > * We don't inline mcs_spin_lock() so that perf can correctly account for the > > * time spent in this lock function. > > */ > > @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > prev = xchg(lock, node); > > if (likely(prev == NULL)) { > > /* Lock acquired */ > > - node->locked = 1; > > Agreed, no one looks at this field in this case, so no need to initialize > it, unless for debug purposes. > > > return; > > } > > ACCESS_ONCE(prev->next) = node; > > @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > arch_mutex_cpu_relax(); > > } > > > > +/* > > + * Releases the lock. The caller should pass in the corresponding node that > > + * was used to acquire the lock. > > + */ > > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > { > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > /* > > * Release the lock by setting it to NULL > > */ > > - if (cmpxchg(lock, node, NULL) == node) > > + if (likely(cmpxchg(lock, node, NULL) == node)) > > Agreed here as well. Takes a narrow race to hit this. > > So, did your testing exercise this path? If the answer is "yes", and > if the issues that I called out in patch #1 are resolved: I haven't instrumented the code to check the hit rate of this path. But the slow path probably will only get hit in some cases under heavy contention. > > Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > > > return; > > /* Wait until the next pointer is set */ > > while (!(next = ACCESS_ONCE(node->next))) > > -- > > 1.7.4.4 > > > > > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 19:42 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 19:42 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > > From: Jason Low <jason.low2@hp.com> > > > > Remove unnecessary operation and make the cmpxchg(lock, node, NULL) == node > > check in mcs_spin_unlock() likely() as it is likely that a race did not occur > > most of the time. > > > > Also add in more comments describing how the local node is used in MCS locks. > > > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> > > Signed-off-by: Jason Low <jason.low2@hp.com> > > --- > > include/linux/mcs_spinlock.h | 13 +++++++++++-- > > 1 files changed, 11 insertions(+), 2 deletions(-) > > > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > > index b5de3b0..96f14299 100644 > > --- a/include/linux/mcs_spinlock.h > > +++ b/include/linux/mcs_spinlock.h > > @@ -18,6 +18,12 @@ struct mcs_spinlock { > > }; > > > > /* > > + * In order to acquire the lock, the caller should declare a local node and > > + * pass a reference of the node to this function in addition to the lock. > > + * If the lock has already been acquired, then this will proceed to spin > > + * on this node->locked until the previous lock holder sets the node->locked > > + * in mcs_spin_unlock(). > > + * > > * We don't inline mcs_spin_lock() so that perf can correctly account for the > > * time spent in this lock function. > > */ > > @@ -33,7 +39,6 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > prev = xchg(lock, node); > > if (likely(prev == NULL)) { > > /* Lock acquired */ > > - node->locked = 1; > > Agreed, no one looks at this field in this case, so no need to initialize > it, unless for debug purposes. > > > return; > > } > > ACCESS_ONCE(prev->next) = node; > > @@ -43,6 +48,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > arch_mutex_cpu_relax(); > > } > > > > +/* > > + * Releases the lock. The caller should pass in the corresponding node that > > + * was used to acquire the lock. > > + */ > > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > { > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > /* > > * Release the lock by setting it to NULL > > */ > > - if (cmpxchg(lock, node, NULL) == node) > > + if (likely(cmpxchg(lock, node, NULL) == node)) > > Agreed here as well. Takes a narrow race to hit this. > > So, did your testing exercise this path? If the answer is "yes", and > if the issues that I called out in patch #1 are resolved: I haven't instrumented the code to check the hit rate of this path. But the slow path probably will only get hit in some cases under heavy contention. > > Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > > > return; > > /* Wait until the next pointer is set */ > > while (!(next = ACCESS_ONCE(node->next))) > > -- > > 1.7.4.4 > > > > > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 22:57 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 22:57 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > > > > +/* > > + * Releases the lock. The caller should pass in the corresponding node that > > + * was used to acquire the lock. > > + */ > > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > { > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > /* > > * Release the lock by setting it to NULL > > */ > > - if (cmpxchg(lock, node, NULL) == node) > > + if (likely(cmpxchg(lock, node, NULL) == node)) > > Agreed here as well. Takes a narrow race to hit this. > > So, did your testing exercise this path? If the answer is "yes", Paul, I did some instrumentation and confirmed that the path in question has been exercised. So this patch should be okay. Tim > and if the issues that I called out in patch #1 are resolved: > > Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > > > return; > > /* Wait until the next pointer is set */ > > while (!(next = ACCESS_ONCE(node->next))) > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 22:57 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 22:57 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > > > > +/* > > + * Releases the lock. The caller should pass in the corresponding node that > > + * was used to acquire the lock. > > + */ > > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > { > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > /* > > * Release the lock by setting it to NULL > > */ > > - if (cmpxchg(lock, node, NULL) == node) > > + if (likely(cmpxchg(lock, node, NULL) == node)) > > Agreed here as well. Takes a narrow race to hit this. > > So, did your testing exercise this path? If the answer is "yes", Paul, I did some instrumentation and confirmed that the path in question has been exercised. So this patch should be okay. Tim > and if the issues that I called out in patch #1 are resolved: > > Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > > > return; > > /* Wait until the next pointer is set */ > > while (!(next = ACCESS_ONCE(node->next))) > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 22:57 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 22:57 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > > > > +/* > > + * Releases the lock. The caller should pass in the corresponding node that > > + * was used to acquire the lock. > > + */ > > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > { > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > /* > > * Release the lock by setting it to NULL > > */ > > - if (cmpxchg(lock, node, NULL) == node) > > + if (likely(cmpxchg(lock, node, NULL) == node)) > > Agreed here as well. Takes a narrow race to hit this. > > So, did your testing exercise this path? If the answer is "yes", Paul, I did some instrumentation and confirmed that the path in question has been exercised. So this patch should be okay. Tim > and if the issues that I called out in patch #1 are resolved: > > Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > > > return; > > /* Wait until the next pointer is set */ > > while (!(next = ACCESS_ONCE(node->next))) > > ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 23:05 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 23:05 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, "H. Peter Anvin" <hpa> On Tue, Nov 19, 2013 at 02:57:41PM -0800, Tim Chen wrote: > On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote: > > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > > > > > > +/* > > > + * Releases the lock. The caller should pass in the corresponding node that > > > + * was used to acquire the lock. > > > + */ > > > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > { > > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > > /* > > > * Release the lock by setting it to NULL > > > */ > > > - if (cmpxchg(lock, node, NULL) == node) > > > + if (likely(cmpxchg(lock, node, NULL) == node)) > > > > Agreed here as well. Takes a narrow race to hit this. > > > > So, did your testing exercise this path? If the answer is "yes", > > > Paul, > > I did some instrumentation and confirmed that the path in question has > been exercised. So this patch should be okay. Very good! Thanx, Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 23:05 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 23:05 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, Nov 19, 2013 at 02:57:41PM -0800, Tim Chen wrote: > On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote: > > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > > > > > > +/* > > > + * Releases the lock. The caller should pass in the corresponding node that > > > + * was used to acquire the lock. > > > + */ > > > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > { > > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > > /* > > > * Release the lock by setting it to NULL > > > */ > > > - if (cmpxchg(lock, node, NULL) == node) > > > + if (likely(cmpxchg(lock, node, NULL) == node)) > > > > Agreed here as well. Takes a narrow race to hit this. > > > > So, did your testing exercise this path? If the answer is "yes", > > > Paul, > > I did some instrumentation and confirmed that the path in question has > been exercised. So this patch should be okay. Very good! Thanx, Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 2/4] MCS Lock: optimizations and extra comments @ 2013-11-19 23:05 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 23:05 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, Nov 19, 2013 at 02:57:41PM -0800, Tim Chen wrote: > On Tue, 2013-11-19 at 11:13 -0800, Paul E. McKenney wrote: > > On Fri, Nov 08, 2013 at 11:52:05AM -0800, Tim Chen wrote: > > > > > > +/* > > > + * Releases the lock. The caller should pass in the corresponding node that > > > + * was used to acquire the lock. > > > + */ > > > static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > { > > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > > @@ -51,7 +60,7 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > > /* > > > * Release the lock by setting it to NULL > > > */ > > > - if (cmpxchg(lock, node, NULL) == node) > > > + if (likely(cmpxchg(lock, node, NULL) == node)) > > > > Agreed here as well. Takes a narrow race to hit this. > > > > So, did your testing exercise this path? If the answer is "yes", > > > Paul, > > I did some instrumentation and confirmed that the path in question has > been exercised. So this patch should be okay. Very good! Thanx, Paul ^ permalink raw reply [flat|nested] 70+ messages in thread
* [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file @ 2013-11-08 19:52 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton From: Waiman Long <Waiman.Long@hp.com> The following changes are made: 1) Create a new mcs_spinlock.c file to contain the mcs_spin_lock() and mcs_spin_unlock() function. 2) Include a number of prerequisite header files and define arch_mutex_cpu_relax(), if not previously defined so the mcs functions can be compiled for multiple architecture without causing problems. Signed-off-by: Waiman Long <Waiman.Long@hp.com> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> --- include/linux/mcs_spinlock.h | 56 ++------------------ kernel/locking/Makefile | 6 +- .../locking/mcs_spinlock.c | 31 ++++++----- 3 files changed, 23 insertions(+), 70 deletions(-) copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%) diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index 96f14299..d54bb23 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -17,57 +17,9 @@ struct mcs_spinlock { int locked; /* 1 if lock acquired */ }; -/* - * In order to acquire the lock, the caller should declare a local node and - * pass a reference of the node to this function in addition to the lock. - * If the lock has already been acquired, then this will proceed to spin - * on this node->locked until the previous lock holder sets the node->locked - * in mcs_spin_unlock(). - * - * We don't inline mcs_spin_lock() so that perf can correctly account for the - * time spent in this lock function. - */ -static noinline -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* Lock acquired */ - return; - } - ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) - arch_mutex_cpu_relax(); -} - -/* - * Releases the lock. The caller should pass in the corresponding node that - * was used to acquire the lock. - */ -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (likely(cmpxchg(lock, node, NULL) == node)) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); -} +extern +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node); +extern +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node); #endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index baab8e5..20d9d5c 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o endif -obj-$(CONFIG_SMP) += spinlock.o -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o +obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c similarity index 78% copy from include/linux/mcs_spinlock.h copy to kernel/locking/mcs_spinlock.c index 96f14299..b6f27f8 100644 --- a/include/linux/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.c @@ -1,7 +1,5 @@ /* - * MCS lock defines - * - * This file contains the main data structure and API definitions of MCS lock. + * MCS lock * * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock * with the desirable properties of being fair, and with each cpu trying @@ -9,13 +7,20 @@ * It avoids expensive cache bouncings that common test-and-set spin-lock * implementations incur. */ -#ifndef __LINUX_MCS_SPINLOCK_H -#define __LINUX_MCS_SPINLOCK_H +/* + * asm/processor.h may define arch_mutex_cpu_relax(). + * If it is not defined, cpu_relax() will be used. + */ +#include <asm/barrier.h> +#include <asm/cmpxchg.h> +#include <asm/processor.h> +#include <linux/compiler.h> +#include <linux/mcs_spinlock.h> +#include <linux/export.h> -struct mcs_spinlock { - struct mcs_spinlock *next; - int locked; /* 1 if lock acquired */ -}; +#ifndef arch_mutex_cpu_relax +# define arch_mutex_cpu_relax() cpu_relax() +#endif /* * In order to acquire the lock, the caller should declare a local node and @@ -23,11 +28,7 @@ struct mcs_spinlock { * If the lock has already been acquired, then this will proceed to spin * on this node->locked until the previous lock holder sets the node->locked * in mcs_spin_unlock(). - * - * We don't inline mcs_spin_lock() so that perf can correctly account for the - * time spent in this lock function. */ -static noinline void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { struct mcs_spinlock *prev; @@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) while (!ACCESS_ONCE(node->locked)) arch_mutex_cpu_relax(); } +EXPORT_SYMBOL_GPL(mcs_spin_lock); /* * Releases the lock. The caller should pass in the corresponding node that @@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod ACCESS_ONCE(next->locked) = 1; smp_wmb(); } - -#endif /* __LINUX_MCS_SPINLOCK_H */ +EXPORT_SYMBOL_GPL(mcs_spin_unlock); -- 1.7.4.4 ^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file @ 2013-11-08 19:52 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang From: Waiman Long <Waiman.Long@hp.com> The following changes are made: 1) Create a new mcs_spinlock.c file to contain the mcs_spin_lock() and mcs_spin_unlock() function. 2) Include a number of prerequisite header files and define arch_mutex_cpu_relax(), if not previously defined so the mcs functions can be compiled for multiple architecture without causing problems. Signed-off-by: Waiman Long <Waiman.Long@hp.com> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> --- include/linux/mcs_spinlock.h | 56 ++------------------ kernel/locking/Makefile | 6 +- .../locking/mcs_spinlock.c | 31 ++++++----- 3 files changed, 23 insertions(+), 70 deletions(-) copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%) diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index 96f14299..d54bb23 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -17,57 +17,9 @@ struct mcs_spinlock { int locked; /* 1 if lock acquired */ }; -/* - * In order to acquire the lock, the caller should declare a local node and - * pass a reference of the node to this function in addition to the lock. - * If the lock has already been acquired, then this will proceed to spin - * on this node->locked until the previous lock holder sets the node->locked - * in mcs_spin_unlock(). - * - * We don't inline mcs_spin_lock() so that perf can correctly account for the - * time spent in this lock function. - */ -static noinline -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* Lock acquired */ - return; - } - ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) - arch_mutex_cpu_relax(); -} - -/* - * Releases the lock. The caller should pass in the corresponding node that - * was used to acquire the lock. - */ -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (likely(cmpxchg(lock, node, NULL) == node)) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); -} +extern +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node); +extern +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node); #endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index baab8e5..20d9d5c 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o endif -obj-$(CONFIG_SMP) += spinlock.o -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o +obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c similarity index 78% copy from include/linux/mcs_spinlock.h copy to kernel/locking/mcs_spinlock.c index 96f14299..b6f27f8 100644 --- a/include/linux/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.c @@ -1,7 +1,5 @@ /* - * MCS lock defines - * - * This file contains the main data structure and API definitions of MCS lock. + * MCS lock * * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock * with the desirable properties of being fair, and with each cpu trying @@ -9,13 +7,20 @@ * It avoids expensive cache bouncings that common test-and-set spin-lock * implementations incur. */ -#ifndef __LINUX_MCS_SPINLOCK_H -#define __LINUX_MCS_SPINLOCK_H +/* + * asm/processor.h may define arch_mutex_cpu_relax(). + * If it is not defined, cpu_relax() will be used. + */ +#include <asm/barrier.h> +#include <asm/cmpxchg.h> +#include <asm/processor.h> +#include <linux/compiler.h> +#include <linux/mcs_spinlock.h> +#include <linux/export.h> -struct mcs_spinlock { - struct mcs_spinlock *next; - int locked; /* 1 if lock acquired */ -}; +#ifndef arch_mutex_cpu_relax +# define arch_mutex_cpu_relax() cpu_relax() +#endif /* * In order to acquire the lock, the caller should declare a local node and @@ -23,11 +28,7 @@ struct mcs_spinlock { * If the lock has already been acquired, then this will proceed to spin * on this node->locked until the previous lock holder sets the node->locked * in mcs_spin_unlock(). - * - * We don't inline mcs_spin_lock() so that perf can correctly account for the - * time spent in this lock function. */ -static noinline void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { struct mcs_spinlock *prev; @@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) while (!ACCESS_ONCE(node->locked)) arch_mutex_cpu_relax(); } +EXPORT_SYMBOL_GPL(mcs_spin_lock); /* * Releases the lock. The caller should pass in the corresponding node that @@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod ACCESS_ONCE(next->locked) = 1; smp_wmb(); } - -#endif /* __LINUX_MCS_SPINLOCK_H */ +EXPORT_SYMBOL_GPL(mcs_spin_unlock); -- 1.7.4.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file @ 2013-11-08 19:52 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang From: Waiman Long <Waiman.Long@hp.com> The following changes are made: 1) Create a new mcs_spinlock.c file to contain the mcs_spin_lock() and mcs_spin_unlock() function. 2) Include a number of prerequisite header files and define arch_mutex_cpu_relax(), if not previously defined so the mcs functions can be compiled for multiple architecture without causing problems. Signed-off-by: Waiman Long <Waiman.Long@hp.com> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> --- include/linux/mcs_spinlock.h | 56 ++------------------ kernel/locking/Makefile | 6 +- .../locking/mcs_spinlock.c | 31 ++++++----- 3 files changed, 23 insertions(+), 70 deletions(-) copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%) diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index 96f14299..d54bb23 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -17,57 +17,9 @@ struct mcs_spinlock { int locked; /* 1 if lock acquired */ }; -/* - * In order to acquire the lock, the caller should declare a local node and - * pass a reference of the node to this function in addition to the lock. - * If the lock has already been acquired, then this will proceed to spin - * on this node->locked until the previous lock holder sets the node->locked - * in mcs_spin_unlock(). - * - * We don't inline mcs_spin_lock() so that perf can correctly account for the - * time spent in this lock function. - */ -static noinline -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* Lock acquired */ - return; - } - ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) - arch_mutex_cpu_relax(); -} - -/* - * Releases the lock. The caller should pass in the corresponding node that - * was used to acquire the lock. - */ -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (likely(cmpxchg(lock, node, NULL) == node)) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); -} +extern +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node); +extern +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node); #endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index baab8e5..20d9d5c 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o endif -obj-$(CONFIG_SMP) += spinlock.o -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o +obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c similarity index 78% copy from include/linux/mcs_spinlock.h copy to kernel/locking/mcs_spinlock.c index 96f14299..b6f27f8 100644 --- a/include/linux/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.c @@ -1,7 +1,5 @@ /* - * MCS lock defines - * - * This file contains the main data structure and API definitions of MCS lock. + * MCS lock * * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock * with the desirable properties of being fair, and with each cpu trying @@ -9,13 +7,20 @@ * It avoids expensive cache bouncings that common test-and-set spin-lock * implementations incur. */ -#ifndef __LINUX_MCS_SPINLOCK_H -#define __LINUX_MCS_SPINLOCK_H +/* + * asm/processor.h may define arch_mutex_cpu_relax(). + * If it is not defined, cpu_relax() will be used. + */ +#include <asm/barrier.h> +#include <asm/cmpxchg.h> +#include <asm/processor.h> +#include <linux/compiler.h> +#include <linux/mcs_spinlock.h> +#include <linux/export.h> -struct mcs_spinlock { - struct mcs_spinlock *next; - int locked; /* 1 if lock acquired */ -}; +#ifndef arch_mutex_cpu_relax +# define arch_mutex_cpu_relax() cpu_relax() +#endif /* * In order to acquire the lock, the caller should declare a local node and @@ -23,11 +28,7 @@ struct mcs_spinlock { * If the lock has already been acquired, then this will proceed to spin * on this node->locked until the previous lock holder sets the node->locked * in mcs_spin_unlock(). - * - * We don't inline mcs_spin_lock() so that perf can correctly account for the - * time spent in this lock function. */ -static noinline void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { struct mcs_spinlock *prev; @@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) while (!ACCESS_ONCE(node->locked)) arch_mutex_cpu_relax(); } +EXPORT_SYMBOL_GPL(mcs_spin_lock); /* * Releases the lock. The caller should pass in the corresponding node that @@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod ACCESS_ONCE(next->locked) = 1; smp_wmb(); } - -#endif /* __LINUX_MCS_SPINLOCK_H */ +EXPORT_SYMBOL_GPL(mcs_spin_unlock); -- 1.7.4.4 ^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file @ 2013-11-19 19:15 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:15 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, "H. Peter Anvin" <hpa> On Fri, Nov 08, 2013 at 11:52:15AM -0800, Tim Chen wrote: > From: Waiman Long <Waiman.Long@hp.com> > > The following changes are made: > > 1) Create a new mcs_spinlock.c file to contain the > mcs_spin_lock() and mcs_spin_unlock() function. > 2) Include a number of prerequisite header files and define > arch_mutex_cpu_relax(), if not previously defined so the > mcs functions can be compiled for multiple architecture without > causing problems. > > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Assuming issues called out in patch #1 are resolved and testing called out for patch #2 has happened: Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > --- > include/linux/mcs_spinlock.h | 56 ++------------------ > kernel/locking/Makefile | 6 +- > .../locking/mcs_spinlock.c | 31 ++++++----- > 3 files changed, 23 insertions(+), 70 deletions(-) > copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%) > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > index 96f14299..d54bb23 100644 > --- a/include/linux/mcs_spinlock.h > +++ b/include/linux/mcs_spinlock.h > @@ -17,57 +17,9 @@ struct mcs_spinlock { > int locked; /* 1 if lock acquired */ > }; > > -/* > - * In order to acquire the lock, the caller should declare a local node and > - * pass a reference of the node to this function in addition to the lock. > - * If the lock has already been acquired, then this will proceed to spin > - * on this node->locked until the previous lock holder sets the node->locked > - * in mcs_spin_unlock(). > - * > - * We don't inline mcs_spin_lock() so that perf can correctly account for the > - * time spent in this lock function. > - */ > -static noinline > -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > -{ > - struct mcs_spinlock *prev; > - > - /* Init node */ > - node->locked = 0; > - node->next = NULL; > - > - prev = xchg(lock, node); > - if (likely(prev == NULL)) { > - /* Lock acquired */ > - return; > - } > - ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > - arch_mutex_cpu_relax(); > -} > - > -/* > - * Releases the lock. The caller should pass in the corresponding node that > - * was used to acquire the lock. > - */ > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > -{ > - struct mcs_spinlock *next = ACCESS_ONCE(node->next); > - > - if (likely(!next)) { > - /* > - * Release the lock by setting it to NULL > - */ > - if (likely(cmpxchg(lock, node, NULL) == node)) > - return; > - /* Wait until the next pointer is set */ > - while (!(next = ACCESS_ONCE(node->next))) > - arch_mutex_cpu_relax(); > - } > - ACCESS_ONCE(next->locked) = 1; > - smp_wmb(); > -} > +extern > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node); > +extern > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node); > > #endif /* __LINUX_MCS_SPINLOCK_H */ > diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile > index baab8e5..20d9d5c 100644 > --- a/kernel/locking/Makefile > +++ b/kernel/locking/Makefile > @@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o > ifeq ($(CONFIG_PROC_FS),y) > obj-$(CONFIG_LOCKDEP) += lockdep_proc.o > endif > -obj-$(CONFIG_SMP) += spinlock.o > -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o > +obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o > +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o > obj-$(CONFIG_RT_MUTEXES) += rtmutex.o > obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o > obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o > -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o > +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o > obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o > obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o > obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o > diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c > similarity index 78% > copy from include/linux/mcs_spinlock.h > copy to kernel/locking/mcs_spinlock.c > index 96f14299..b6f27f8 100644 > --- a/include/linux/mcs_spinlock.h > +++ b/kernel/locking/mcs_spinlock.c > @@ -1,7 +1,5 @@ > /* > - * MCS lock defines > - * > - * This file contains the main data structure and API definitions of MCS lock. > + * MCS lock > * > * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > * with the desirable properties of being fair, and with each cpu trying > @@ -9,13 +7,20 @@ > * It avoids expensive cache bouncings that common test-and-set spin-lock > * implementations incur. > */ > -#ifndef __LINUX_MCS_SPINLOCK_H > -#define __LINUX_MCS_SPINLOCK_H > +/* > + * asm/processor.h may define arch_mutex_cpu_relax(). > + * If it is not defined, cpu_relax() will be used. > + */ > +#include <asm/barrier.h> > +#include <asm/cmpxchg.h> > +#include <asm/processor.h> > +#include <linux/compiler.h> > +#include <linux/mcs_spinlock.h> > +#include <linux/export.h> > > -struct mcs_spinlock { > - struct mcs_spinlock *next; > - int locked; /* 1 if lock acquired */ > -}; > +#ifndef arch_mutex_cpu_relax > +# define arch_mutex_cpu_relax() cpu_relax() > +#endif > > /* > * In order to acquire the lock, the caller should declare a local node and > @@ -23,11 +28,7 @@ struct mcs_spinlock { > * If the lock has already been acquired, then this will proceed to spin > * on this node->locked until the previous lock holder sets the node->locked > * in mcs_spin_unlock(). > - * > - * We don't inline mcs_spin_lock() so that perf can correctly account for the > - * time spent in this lock function. > */ > -static noinline > void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > { > struct mcs_spinlock *prev; > @@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > } > +EXPORT_SYMBOL_GPL(mcs_spin_lock); > > /* > * Releases the lock. The caller should pass in the corresponding node that > @@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > ACCESS_ONCE(next->locked) = 1; > smp_wmb(); > } > - > -#endif /* __LINUX_MCS_SPINLOCK_H */ > +EXPORT_SYMBOL_GPL(mcs_spin_unlock); > -- > 1.7.4.4 > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file @ 2013-11-19 19:15 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:15 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Fri, Nov 08, 2013 at 11:52:15AM -0800, Tim Chen wrote: > From: Waiman Long <Waiman.Long@hp.com> > > The following changes are made: > > 1) Create a new mcs_spinlock.c file to contain the > mcs_spin_lock() and mcs_spin_unlock() function. > 2) Include a number of prerequisite header files and define > arch_mutex_cpu_relax(), if not previously defined so the > mcs functions can be compiled for multiple architecture without > causing problems. > > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Assuming issues called out in patch #1 are resolved and testing called out for patch #2 has happened: Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > --- > include/linux/mcs_spinlock.h | 56 ++------------------ > kernel/locking/Makefile | 6 +- > .../locking/mcs_spinlock.c | 31 ++++++----- > 3 files changed, 23 insertions(+), 70 deletions(-) > copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%) > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > index 96f14299..d54bb23 100644 > --- a/include/linux/mcs_spinlock.h > +++ b/include/linux/mcs_spinlock.h > @@ -17,57 +17,9 @@ struct mcs_spinlock { > int locked; /* 1 if lock acquired */ > }; > > -/* > - * In order to acquire the lock, the caller should declare a local node and > - * pass a reference of the node to this function in addition to the lock. > - * If the lock has already been acquired, then this will proceed to spin > - * on this node->locked until the previous lock holder sets the node->locked > - * in mcs_spin_unlock(). > - * > - * We don't inline mcs_spin_lock() so that perf can correctly account for the > - * time spent in this lock function. > - */ > -static noinline > -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > -{ > - struct mcs_spinlock *prev; > - > - /* Init node */ > - node->locked = 0; > - node->next = NULL; > - > - prev = xchg(lock, node); > - if (likely(prev == NULL)) { > - /* Lock acquired */ > - return; > - } > - ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > - arch_mutex_cpu_relax(); > -} > - > -/* > - * Releases the lock. The caller should pass in the corresponding node that > - * was used to acquire the lock. > - */ > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > -{ > - struct mcs_spinlock *next = ACCESS_ONCE(node->next); > - > - if (likely(!next)) { > - /* > - * Release the lock by setting it to NULL > - */ > - if (likely(cmpxchg(lock, node, NULL) == node)) > - return; > - /* Wait until the next pointer is set */ > - while (!(next = ACCESS_ONCE(node->next))) > - arch_mutex_cpu_relax(); > - } > - ACCESS_ONCE(next->locked) = 1; > - smp_wmb(); > -} > +extern > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node); > +extern > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node); > > #endif /* __LINUX_MCS_SPINLOCK_H */ > diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile > index baab8e5..20d9d5c 100644 > --- a/kernel/locking/Makefile > +++ b/kernel/locking/Makefile > @@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o > ifeq ($(CONFIG_PROC_FS),y) > obj-$(CONFIG_LOCKDEP) += lockdep_proc.o > endif > -obj-$(CONFIG_SMP) += spinlock.o > -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o > +obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o > +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o > obj-$(CONFIG_RT_MUTEXES) += rtmutex.o > obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o > obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o > -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o > +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o > obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o > obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o > obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o > diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c > similarity index 78% > copy from include/linux/mcs_spinlock.h > copy to kernel/locking/mcs_spinlock.c > index 96f14299..b6f27f8 100644 > --- a/include/linux/mcs_spinlock.h > +++ b/kernel/locking/mcs_spinlock.c > @@ -1,7 +1,5 @@ > /* > - * MCS lock defines > - * > - * This file contains the main data structure and API definitions of MCS lock. > + * MCS lock > * > * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > * with the desirable properties of being fair, and with each cpu trying > @@ -9,13 +7,20 @@ > * It avoids expensive cache bouncings that common test-and-set spin-lock > * implementations incur. > */ > -#ifndef __LINUX_MCS_SPINLOCK_H > -#define __LINUX_MCS_SPINLOCK_H > +/* > + * asm/processor.h may define arch_mutex_cpu_relax(). > + * If it is not defined, cpu_relax() will be used. > + */ > +#include <asm/barrier.h> > +#include <asm/cmpxchg.h> > +#include <asm/processor.h> > +#include <linux/compiler.h> > +#include <linux/mcs_spinlock.h> > +#include <linux/export.h> > > -struct mcs_spinlock { > - struct mcs_spinlock *next; > - int locked; /* 1 if lock acquired */ > -}; > +#ifndef arch_mutex_cpu_relax > +# define arch_mutex_cpu_relax() cpu_relax() > +#endif > > /* > * In order to acquire the lock, the caller should declare a local node and > @@ -23,11 +28,7 @@ struct mcs_spinlock { > * If the lock has already been acquired, then this will proceed to spin > * on this node->locked until the previous lock holder sets the node->locked > * in mcs_spin_unlock(). > - * > - * We don't inline mcs_spin_lock() so that perf can correctly account for the > - * time spent in this lock function. > */ > -static noinline > void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > { > struct mcs_spinlock *prev; > @@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > } > +EXPORT_SYMBOL_GPL(mcs_spin_lock); > > /* > * Releases the lock. The caller should pass in the corresponding node that > @@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > ACCESS_ONCE(next->locked) = 1; > smp_wmb(); > } > - > -#endif /* __LINUX_MCS_SPINLOCK_H */ > +EXPORT_SYMBOL_GPL(mcs_spin_unlock); > -- > 1.7.4.4 > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file @ 2013-11-19 19:15 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:15 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Fri, Nov 08, 2013 at 11:52:15AM -0800, Tim Chen wrote: > From: Waiman Long <Waiman.Long@hp.com> > > The following changes are made: > > 1) Create a new mcs_spinlock.c file to contain the > mcs_spin_lock() and mcs_spin_unlock() function. > 2) Include a number of prerequisite header files and define > arch_mutex_cpu_relax(), if not previously defined so the > mcs functions can be compiled for multiple architecture without > causing problems. > > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Assuming issues called out in patch #1 are resolved and testing called out for patch #2 has happened: Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> > --- > include/linux/mcs_spinlock.h | 56 ++------------------ > kernel/locking/Makefile | 6 +- > .../locking/mcs_spinlock.c | 31 ++++++----- > 3 files changed, 23 insertions(+), 70 deletions(-) > copy include/linux/mcs_spinlock.h => kernel/locking/mcs_spinlock.c (78%) > > diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h > index 96f14299..d54bb23 100644 > --- a/include/linux/mcs_spinlock.h > +++ b/include/linux/mcs_spinlock.h > @@ -17,57 +17,9 @@ struct mcs_spinlock { > int locked; /* 1 if lock acquired */ > }; > > -/* > - * In order to acquire the lock, the caller should declare a local node and > - * pass a reference of the node to this function in addition to the lock. > - * If the lock has already been acquired, then this will proceed to spin > - * on this node->locked until the previous lock holder sets the node->locked > - * in mcs_spin_unlock(). > - * > - * We don't inline mcs_spin_lock() so that perf can correctly account for the > - * time spent in this lock function. > - */ > -static noinline > -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > -{ > - struct mcs_spinlock *prev; > - > - /* Init node */ > - node->locked = 0; > - node->next = NULL; > - > - prev = xchg(lock, node); > - if (likely(prev == NULL)) { > - /* Lock acquired */ > - return; > - } > - ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > - arch_mutex_cpu_relax(); > -} > - > -/* > - * Releases the lock. The caller should pass in the corresponding node that > - * was used to acquire the lock. > - */ > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > -{ > - struct mcs_spinlock *next = ACCESS_ONCE(node->next); > - > - if (likely(!next)) { > - /* > - * Release the lock by setting it to NULL > - */ > - if (likely(cmpxchg(lock, node, NULL) == node)) > - return; > - /* Wait until the next pointer is set */ > - while (!(next = ACCESS_ONCE(node->next))) > - arch_mutex_cpu_relax(); > - } > - ACCESS_ONCE(next->locked) = 1; > - smp_wmb(); > -} > +extern > +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node); > +extern > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node); > > #endif /* __LINUX_MCS_SPINLOCK_H */ > diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile > index baab8e5..20d9d5c 100644 > --- a/kernel/locking/Makefile > +++ b/kernel/locking/Makefile > @@ -13,12 +13,12 @@ obj-$(CONFIG_LOCKDEP) += lockdep.o > ifeq ($(CONFIG_PROC_FS),y) > obj-$(CONFIG_LOCKDEP) += lockdep_proc.o > endif > -obj-$(CONFIG_SMP) += spinlock.o > -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o > +obj-$(CONFIG_SMP) += spinlock.o mcs_spinlock.o > +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o mcs_spinlock.o > obj-$(CONFIG_RT_MUTEXES) += rtmutex.o > obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o > obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o > -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o > +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o mcs_spinlock.o > obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o > obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o > obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o > diff --git a/include/linux/mcs_spinlock.h b/kernel/locking/mcs_spinlock.c > similarity index 78% > copy from include/linux/mcs_spinlock.h > copy to kernel/locking/mcs_spinlock.c > index 96f14299..b6f27f8 100644 > --- a/include/linux/mcs_spinlock.h > +++ b/kernel/locking/mcs_spinlock.c > @@ -1,7 +1,5 @@ > /* > - * MCS lock defines > - * > - * This file contains the main data structure and API definitions of MCS lock. > + * MCS lock > * > * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock > * with the desirable properties of being fair, and with each cpu trying > @@ -9,13 +7,20 @@ > * It avoids expensive cache bouncings that common test-and-set spin-lock > * implementations incur. > */ > -#ifndef __LINUX_MCS_SPINLOCK_H > -#define __LINUX_MCS_SPINLOCK_H > +/* > + * asm/processor.h may define arch_mutex_cpu_relax(). > + * If it is not defined, cpu_relax() will be used. > + */ > +#include <asm/barrier.h> > +#include <asm/cmpxchg.h> > +#include <asm/processor.h> > +#include <linux/compiler.h> > +#include <linux/mcs_spinlock.h> > +#include <linux/export.h> > > -struct mcs_spinlock { > - struct mcs_spinlock *next; > - int locked; /* 1 if lock acquired */ > -}; > +#ifndef arch_mutex_cpu_relax > +# define arch_mutex_cpu_relax() cpu_relax() > +#endif > > /* > * In order to acquire the lock, the caller should declare a local node and > @@ -23,11 +28,7 @@ struct mcs_spinlock { > * If the lock has already been acquired, then this will proceed to spin > * on this node->locked until the previous lock holder sets the node->locked > * in mcs_spin_unlock(). > - * > - * We don't inline mcs_spin_lock() so that perf can correctly account for the > - * time spent in this lock function. > */ > -static noinline > void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > { > struct mcs_spinlock *prev; > @@ -47,6 +48,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > } > +EXPORT_SYMBOL_GPL(mcs_spin_lock); > > /* > * Releases the lock. The caller should pass in the corresponding node that > @@ -69,5 +71,4 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > ACCESS_ONCE(next->locked) = 1; > smp_wmb(); > } > - > -#endif /* __LINUX_MCS_SPINLOCK_H */ > +EXPORT_SYMBOL_GPL(mcs_spin_unlock); > -- > 1.7.4.4 > > > ^ permalink raw reply [flat|nested] 70+ messages in thread
* [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-08 19:52 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton From: Waiman Long <Waiman.Long@hp.com> This patch corrects the way memory barriers are used in the MCS lock with smp_load_acquire and smp_store_release fucnction. It removes ones that are not needed. It uses architecture specific load-acquire and store-release primitives for synchronization, if available. Generic implementations are provided in case they are not defined even though they may not be optimal. These generic implementation could be removed later on once changes are made in all the relevant header files. Suggested-by: Michel Lespinasse <walken@google.com> Signed-off-by: Waiman Long <Waiman.Long@hp.com> Signed-off-by: Jason Low <jason.low2@hp.com> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> --- kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ 1 files changed, 41 insertions(+), 7 deletions(-) diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index b6f27f8..df5c167 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -23,6 +23,31 @@ #endif /* + * Fall back to use the regular atomic operations and memory barrier if + * the acquire/release versions are not defined. + */ +#ifndef xchg_acquire +# define xchg_acquire(p, v) xchg(p, v) +#endif + +#ifndef smp_load_acquire +# define smp_load_acquire(p) \ + ({ \ + typeof(*p) __v = ACCESS_ONCE(*(p)); \ + smp_mb(); \ + __v; \ + }) +#endif + +#ifndef smp_store_release +# define smp_store_release(p, v) \ + do { \ + smp_mb(); \ + ACCESS_ONCE(*(p)) = v; \ + } while (0) +#endif + +/* * In order to acquire the lock, the caller should declare a local node and * pass a reference of the node to this function in addition to the lock. * If the lock has already been acquired, then this will proceed to spin @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) node->locked = 0; node->next = NULL; - prev = xchg(lock, node); + /* xchg() provides a memory barrier */ + prev = xchg_acquire(lock, node); if (likely(prev == NULL)) { /* Lock acquired */ return; } ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) + /* + * Wait until the lock holder passes the lock down. + * Using smp_load_acquire() provides a memory barrier that + * ensures subsequent operations happen after the lock is acquired. + */ + while (!(smp_load_acquire(&node->locked))) arch_mutex_cpu_relax(); } EXPORT_SYMBOL_GPL(mcs_spin_lock); @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock); * Releases the lock. The caller should pass in the corresponding node that * was used to acquire the lock. */ -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { struct mcs_spinlock *next = ACCESS_ONCE(node->next); @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod while (!(next = ACCESS_ONCE(node->next))) arch_mutex_cpu_relax(); } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); + /* + * Pass lock to next waiter. + * smp_store_release() provides a memory barrier to ensure + * all operations in the critical section has been completed + * before unlocking. + */ + smp_store_release(&next->locked , 1); } EXPORT_SYMBOL_GPL(mcs_spin_unlock); -- 1.7.4.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-08 19:52 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang From: Waiman Long <Waiman.Long@hp.com> This patch corrects the way memory barriers are used in the MCS lock with smp_load_acquire and smp_store_release fucnction. It removes ones that are not needed. It uses architecture specific load-acquire and store-release primitives for synchronization, if available. Generic implementations are provided in case they are not defined even though they may not be optimal. These generic implementation could be removed later on once changes are made in all the relevant header files. Suggested-by: Michel Lespinasse <walken@google.com> Signed-off-by: Waiman Long <Waiman.Long@hp.com> Signed-off-by: Jason Low <jason.low2@hp.com> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> --- kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ 1 files changed, 41 insertions(+), 7 deletions(-) diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index b6f27f8..df5c167 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -23,6 +23,31 @@ #endif /* + * Fall back to use the regular atomic operations and memory barrier if + * the acquire/release versions are not defined. + */ +#ifndef xchg_acquire +# define xchg_acquire(p, v) xchg(p, v) +#endif + +#ifndef smp_load_acquire +# define smp_load_acquire(p) \ + ({ \ + typeof(*p) __v = ACCESS_ONCE(*(p)); \ + smp_mb(); \ + __v; \ + }) +#endif + +#ifndef smp_store_release +# define smp_store_release(p, v) \ + do { \ + smp_mb(); \ + ACCESS_ONCE(*(p)) = v; \ + } while (0) +#endif + +/* * In order to acquire the lock, the caller should declare a local node and * pass a reference of the node to this function in addition to the lock. * If the lock has already been acquired, then this will proceed to spin @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) node->locked = 0; node->next = NULL; - prev = xchg(lock, node); + /* xchg() provides a memory barrier */ + prev = xchg_acquire(lock, node); if (likely(prev == NULL)) { /* Lock acquired */ return; } ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) + /* + * Wait until the lock holder passes the lock down. + * Using smp_load_acquire() provides a memory barrier that + * ensures subsequent operations happen after the lock is acquired. + */ + while (!(smp_load_acquire(&node->locked))) arch_mutex_cpu_relax(); } EXPORT_SYMBOL_GPL(mcs_spin_lock); @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock); * Releases the lock. The caller should pass in the corresponding node that * was used to acquire the lock. */ -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { struct mcs_spinlock *next = ACCESS_ONCE(node->next); @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod while (!(next = ACCESS_ONCE(node->next))) arch_mutex_cpu_relax(); } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); + /* + * Pass lock to next waiter. + * smp_store_release() provides a memory barrier to ensure + * all operations in the critical section has been completed + * before unlocking. + */ + smp_store_release(&next->locked , 1); } EXPORT_SYMBOL_GPL(mcs_spin_unlock); -- 1.7.4.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply related [flat|nested] 70+ messages in thread
* [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-08 19:52 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-08 19:52 UTC (permalink / raw) To: Ingo Molnar, Andrew Morton, Thomas Gleixner Cc: linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Tim Chen, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang From: Waiman Long <Waiman.Long@hp.com> This patch corrects the way memory barriers are used in the MCS lock with smp_load_acquire and smp_store_release fucnction. It removes ones that are not needed. It uses architecture specific load-acquire and store-release primitives for synchronization, if available. Generic implementations are provided in case they are not defined even though they may not be optimal. These generic implementation could be removed later on once changes are made in all the relevant header files. Suggested-by: Michel Lespinasse <walken@google.com> Signed-off-by: Waiman Long <Waiman.Long@hp.com> Signed-off-by: Jason Low <jason.low2@hp.com> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> --- kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ 1 files changed, 41 insertions(+), 7 deletions(-) diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index b6f27f8..df5c167 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c @@ -23,6 +23,31 @@ #endif /* + * Fall back to use the regular atomic operations and memory barrier if + * the acquire/release versions are not defined. + */ +#ifndef xchg_acquire +# define xchg_acquire(p, v) xchg(p, v) +#endif + +#ifndef smp_load_acquire +# define smp_load_acquire(p) \ + ({ \ + typeof(*p) __v = ACCESS_ONCE(*(p)); \ + smp_mb(); \ + __v; \ + }) +#endif + +#ifndef smp_store_release +# define smp_store_release(p, v) \ + do { \ + smp_mb(); \ + ACCESS_ONCE(*(p)) = v; \ + } while (0) +#endif + +/* * In order to acquire the lock, the caller should declare a local node and * pass a reference of the node to this function in addition to the lock. * If the lock has already been acquired, then this will proceed to spin @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) node->locked = 0; node->next = NULL; - prev = xchg(lock, node); + /* xchg() provides a memory barrier */ + prev = xchg_acquire(lock, node); if (likely(prev == NULL)) { /* Lock acquired */ return; } ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) + /* + * Wait until the lock holder passes the lock down. + * Using smp_load_acquire() provides a memory barrier that + * ensures subsequent operations happen after the lock is acquired. + */ + while (!(smp_load_acquire(&node->locked))) arch_mutex_cpu_relax(); } EXPORT_SYMBOL_GPL(mcs_spin_lock); @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock); * Releases the lock. The caller should pass in the corresponding node that * was used to acquire the lock. */ -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { struct mcs_spinlock *next = ACCESS_ONCE(node->next); @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod while (!(next = ACCESS_ONCE(node->next))) arch_mutex_cpu_relax(); } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); + /* + * Pass lock to next waiter. + * smp_store_release() provides a memory barrier to ensure + * all operations in the critical section has been completed + * before unlocking. + */ + smp_store_release(&next->locked , 1); } EXPORT_SYMBOL_GPL(mcs_spin_unlock); -- 1.7.4.4 ^ permalink raw reply related [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-08 19:52 ` Tim Chen @ 2013-11-11 18:10 ` Will Deacon -1 siblings, 0 replies; 70+ messages in thread From: Will Deacon @ 2013-11-11 18:10 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T Hello, On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote: > From: Waiman Long <Waiman.Long@hp.com> > > This patch corrects the way memory barriers are used in the MCS lock > with smp_load_acquire and smp_store_release fucnction. > It removes ones that are not needed. > > It uses architecture specific load-acquire and store-release > primitives for synchronization, if available. Generic implementations > are provided in case they are not defined even though they may not > be optimal. These generic implementation could be removed later on > once changes are made in all the relevant header files. > > Suggested-by: Michel Lespinasse <walken@google.com> > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > Signed-off-by: Jason Low <jason.low2@hp.com> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > --- > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > 1 files changed, 41 insertions(+), 7 deletions(-) > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > index b6f27f8..df5c167 100644 > --- a/kernel/locking/mcs_spinlock.c > +++ b/kernel/locking/mcs_spinlock.c > @@ -23,6 +23,31 @@ > #endif > > /* > + * Fall back to use the regular atomic operations and memory barrier if > + * the acquire/release versions are not defined. > + */ > +#ifndef xchg_acquire > +# define xchg_acquire(p, v) xchg(p, v) > +#endif > + > +#ifndef smp_load_acquire > +# define smp_load_acquire(p) \ > + ({ \ > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > + smp_mb(); \ > + __v; \ > + }) > +#endif > + > +#ifndef smp_store_release > +# define smp_store_release(p, v) \ > + do { \ > + smp_mb(); \ > + ACCESS_ONCE(*(p)) = v; \ > + } while (0) > +#endif PeterZ already has a series implementing acquire/release accessors, so you should probably take a look at that rather than rolling your own here. You could then augment that with [cmp]xchg_{acquire,release} as appropriate. > +/* > * In order to acquire the lock, the caller should declare a local node and > * pass a reference of the node to this function in addition to the lock. > * If the lock has already been acquired, then this will proceed to spin > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > node->locked = 0; > node->next = NULL; > > - prev = xchg(lock, node); > + /* xchg() provides a memory barrier */ > + prev = xchg_acquire(lock, node); > if (likely(prev == NULL)) { > /* Lock acquired */ > return; > } > ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > + /* > + * Wait until the lock holder passes the lock down. > + * Using smp_load_acquire() provides a memory barrier that > + * ensures subsequent operations happen after the lock is acquired. > + */ > + while (!(smp_load_acquire(&node->locked))) > arch_mutex_cpu_relax(); After a chat with some micro-architects, I'm going to have to disagree with Paul here. For architectures where acquire/release are implemented with explicit barriers (similarly for simple microarchitectures), emitting barriers in a loop *is* going to have an affect on overall performance, since those barriers may well result in traffic outside of the core (at least, on ARM). Thinking more about that, the real issue here is that arch_mutex_cpu_relax() doesn't have a corresponding hook on the unlock side. On ARM, for example, we can enter a low-power state using the wfe instruction, but that requires the unlocker to wake up the core when the lock is released. So, although I'm completely in favour of introducing acquire/release accessors, I really think the mcs locking routines would benefit from some arch-specific backend code, even if it's optional (although I would imagine most architectures implementing something to improve power and/or performance). Will -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-11 18:10 ` Will Deacon 0 siblings, 0 replies; 70+ messages in thread From: Will Deacon @ 2013-11-11 18:10 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang Hello, On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote: > From: Waiman Long <Waiman.Long@hp.com> > > This patch corrects the way memory barriers are used in the MCS lock > with smp_load_acquire and smp_store_release fucnction. > It removes ones that are not needed. > > It uses architecture specific load-acquire and store-release > primitives for synchronization, if available. Generic implementations > are provided in case they are not defined even though they may not > be optimal. These generic implementation could be removed later on > once changes are made in all the relevant header files. > > Suggested-by: Michel Lespinasse <walken@google.com> > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > Signed-off-by: Jason Low <jason.low2@hp.com> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > --- > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > 1 files changed, 41 insertions(+), 7 deletions(-) > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > index b6f27f8..df5c167 100644 > --- a/kernel/locking/mcs_spinlock.c > +++ b/kernel/locking/mcs_spinlock.c > @@ -23,6 +23,31 @@ > #endif > > /* > + * Fall back to use the regular atomic operations and memory barrier if > + * the acquire/release versions are not defined. > + */ > +#ifndef xchg_acquire > +# define xchg_acquire(p, v) xchg(p, v) > +#endif > + > +#ifndef smp_load_acquire > +# define smp_load_acquire(p) \ > + ({ \ > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > + smp_mb(); \ > + __v; \ > + }) > +#endif > + > +#ifndef smp_store_release > +# define smp_store_release(p, v) \ > + do { \ > + smp_mb(); \ > + ACCESS_ONCE(*(p)) = v; \ > + } while (0) > +#endif PeterZ already has a series implementing acquire/release accessors, so you should probably take a look at that rather than rolling your own here. You could then augment that with [cmp]xchg_{acquire,release} as appropriate. > +/* > * In order to acquire the lock, the caller should declare a local node and > * pass a reference of the node to this function in addition to the lock. > * If the lock has already been acquired, then this will proceed to spin > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > node->locked = 0; > node->next = NULL; > > - prev = xchg(lock, node); > + /* xchg() provides a memory barrier */ > + prev = xchg_acquire(lock, node); > if (likely(prev == NULL)) { > /* Lock acquired */ > return; > } > ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > + /* > + * Wait until the lock holder passes the lock down. > + * Using smp_load_acquire() provides a memory barrier that > + * ensures subsequent operations happen after the lock is acquired. > + */ > + while (!(smp_load_acquire(&node->locked))) > arch_mutex_cpu_relax(); After a chat with some micro-architects, I'm going to have to disagree with Paul here. For architectures where acquire/release are implemented with explicit barriers (similarly for simple microarchitectures), emitting barriers in a loop *is* going to have an affect on overall performance, since those barriers may well result in traffic outside of the core (at least, on ARM). Thinking more about that, the real issue here is that arch_mutex_cpu_relax() doesn't have a corresponding hook on the unlock side. On ARM, for example, we can enter a low-power state using the wfe instruction, but that requires the unlocker to wake up the core when the lock is released. So, although I'm completely in favour of introducing acquire/release accessors, I really think the mcs locking routines would benefit from some arch-specific backend code, even if it's optional (although I would imagine most architectures implementing something to improve power and/or performance). Will -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-11 18:10 ` Will Deacon @ 2013-11-11 18:20 ` Peter Zijlstra -1 siblings, 0 replies; 70+ messages in thread From: Peter Zijlstra @ 2013-11-11 18:20 UTC (permalink / raw) To: Will Deacon Cc: Tim Chen, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote: > > + /* > > + * Wait until the lock holder passes the lock down. > > + * Using smp_load_acquire() provides a memory barrier that > > + * ensures subsequent operations happen after the lock is acquired. > > + */ > > + while (!(smp_load_acquire(&node->locked))) > > arch_mutex_cpu_relax(); > Thinking more about that, the real issue here is that arch_mutex_cpu_relax() > doesn't have a corresponding hook on the unlock side. On ARM, for example, > we can enter a low-power state using the wfe instruction, but that requires > the unlocker to wake up the core when the lock is released. That said, it would be ever so awesome if we could come to some sort of conclusion on control dependencies here. I _know_ C/C++ doesn't do them, but at the end of the day the compiler still generates ASM and as long as we're relatively certain there's a loop there (there has to be, right? :-), we could maybe rely on it anyway. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-11 18:20 ` Peter Zijlstra 0 siblings, 0 replies; 70+ messages in thread From: Peter Zijlstra @ 2013-11-11 18:20 UTC (permalink / raw) To: Will Deacon Cc: Tim Chen, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote: > > + /* > > + * Wait until the lock holder passes the lock down. > > + * Using smp_load_acquire() provides a memory barrier that > > + * ensures subsequent operations happen after the lock is acquired. > > + */ > > + while (!(smp_load_acquire(&node->locked))) > > arch_mutex_cpu_relax(); > Thinking more about that, the real issue here is that arch_mutex_cpu_relax() > doesn't have a corresponding hook on the unlock side. On ARM, for example, > we can enter a low-power state using the wfe instruction, but that requires > the unlocker to wake up the core when the lock is released. That said, it would be ever so awesome if we could come to some sort of conclusion on control dependencies here. I _know_ C/C++ doesn't do them, but at the end of the day the compiler still generates ASM and as long as we're relatively certain there's a loop there (there has to be, right? :-), we could maybe rely on it anyway. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-11 18:20 ` Peter Zijlstra @ 2013-11-19 19:23 ` Paul E. McKenney -1 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:23 UTC (permalink / raw) To: Peter Zijlstra Cc: Will Deacon, Tim Chen, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra On Mon, Nov 11, 2013 at 07:20:59PM +0100, Peter Zijlstra wrote: > On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote: > > > + /* > > > + * Wait until the lock holder passes the lock down. > > > + * Using smp_load_acquire() provides a memory barrier that > > > + * ensures subsequent operations happen after the lock is acquired. > > > + */ > > > + while (!(smp_load_acquire(&node->locked))) > > > arch_mutex_cpu_relax(); > > > Thinking more about that, the real issue here is that arch_mutex_cpu_relax() > > doesn't have a corresponding hook on the unlock side. On ARM, for example, > > we can enter a low-power state using the wfe instruction, but that requires > > the unlocker to wake up the core when the lock is released. > > That said, it would be ever so awesome if we could come to some sort of > conclusion on control dependencies here. > > I _know_ C/C++ doesn't do them, but at the end of the day the compiler > still generates ASM and as long as we're relatively certain there's a > loop there (there has to be, right? :-), we could maybe rely on it > anyway. I believe that we will be able to rely on control dependencies leading to stores (not loads!), but even then they are not guaranteed to provide transitivity. This lack of transitivity sounds problematic for locking primitives. Thanx, Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 19:23 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:23 UTC (permalink / raw) To: Peter Zijlstra Cc: Will Deacon, Tim Chen, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On Mon, Nov 11, 2013 at 07:20:59PM +0100, Peter Zijlstra wrote: > On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote: > > > + /* > > > + * Wait until the lock holder passes the lock down. > > > + * Using smp_load_acquire() provides a memory barrier that > > > + * ensures subsequent operations happen after the lock is acquired. > > > + */ > > > + while (!(smp_load_acquire(&node->locked))) > > > arch_mutex_cpu_relax(); > > > Thinking more about that, the real issue here is that arch_mutex_cpu_relax() > > doesn't have a corresponding hook on the unlock side. On ARM, for example, > > we can enter a low-power state using the wfe instruction, but that requires > > the unlocker to wake up the core when the lock is released. > > That said, it would be ever so awesome if we could come to some sort of > conclusion on control dependencies here. > > I _know_ C/C++ doesn't do them, but at the end of the day the compiler > still generates ASM and as long as we're relatively certain there's a > loop there (there has to be, right? :-), we could maybe rely on it > anyway. I believe that we will be able to rely on control dependencies leading to stores (not loads!), but even then they are not guaranteed to provide transitivity. This lack of transitivity sounds problematic for locking primitives. Thanx, Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-11 18:10 ` Will Deacon @ 2013-11-11 21:17 ` Tim Chen -1 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-11 21:17 UTC (permalink / raw) To: Will Deacon Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter On Mon, 2013-11-11 at 18:10 +0000, Will Deacon wrote: > Hello, > > On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote: > > From: Waiman Long <Waiman.Long@hp.com> > > > > This patch corrects the way memory barriers are used in the MCS lock > > with smp_load_acquire and smp_store_release fucnction. > > It removes ones that are not needed. > > > > It uses architecture specific load-acquire and store-release > > primitives for synchronization, if available. Generic implementations > > are provided in case they are not defined even though they may not > > be optimal. These generic implementation could be removed later on > > once changes are made in all the relevant header files. > > > > Suggested-by: Michel Lespinasse <walken@google.com> > > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > > Signed-off-by: Jason Low <jason.low2@hp.com> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > --- > > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > > 1 files changed, 41 insertions(+), 7 deletions(-) > > > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > > index b6f27f8..df5c167 100644 > > --- a/kernel/locking/mcs_spinlock.c > > +++ b/kernel/locking/mcs_spinlock.c > > @@ -23,6 +23,31 @@ > > #endif > > > > /* > > + * Fall back to use the regular atomic operations and memory barrier if > > + * the acquire/release versions are not defined. > > + */ > > +#ifndef xchg_acquire > > +# define xchg_acquire(p, v) xchg(p, v) > > +#endif > > + > > +#ifndef smp_load_acquire > > +# define smp_load_acquire(p) \ > > + ({ \ > > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > > + smp_mb(); \ > > + __v; \ > > + }) > > +#endif > > + > > +#ifndef smp_store_release > > +# define smp_store_release(p, v) \ > > + do { \ > > + smp_mb(); \ > > + ACCESS_ONCE(*(p)) = v; \ > > + } while (0) > > +#endif > > PeterZ already has a series implementing acquire/release accessors, so you > should probably take a look at that rather than rolling your own here. Yes, we are using Peter Z's implementation here. The above is for anything where smp_load_acquire and smp_store_release are *not* defined. We can remove this once all architectures implement the acquire and release functions as mentioned in the comments of the patch. > > You could then augment that with [cmp]xchg_{acquire,release} as > appropriate. > > > +/* > > * In order to acquire the lock, the caller should declare a local node and > > * pass a reference of the node to this function in addition to the lock. > > * If the lock has already been acquired, then this will proceed to spin > > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > node->locked = 0; > > node->next = NULL; > > > > - prev = xchg(lock, node); > > + /* xchg() provides a memory barrier */ > > + prev = xchg_acquire(lock, node); > > if (likely(prev == NULL)) { > > /* Lock acquired */ > > return; > > } > > ACCESS_ONCE(prev->next) = node; > > - smp_wmb(); > > - /* Wait until the lock holder passes the lock down */ > > - while (!ACCESS_ONCE(node->locked)) > > + /* > > + * Wait until the lock holder passes the lock down. > > + * Using smp_load_acquire() provides a memory barrier that > > + * ensures subsequent operations happen after the lock is acquired. > > + */ > > + while (!(smp_load_acquire(&node->locked))) > > arch_mutex_cpu_relax(); > An alternate implementation is while (!ACCESS_ONCE(node->locked)) arch_mutex_cpu_relax(); smp_load_acquire(&node->locked); Leaving the smp_load_acquire at the end to provide appropriate barrier. Will that be acceptable? Tim > After a chat with some micro-architects, I'm going to have to disagree with > Paul here. For architectures where acquire/release are implemented with > explicit barriers (similarly for simple microarchitectures), emitting > barriers in a loop *is* going to have an affect on overall performance, > since those barriers may well result in traffic outside of the core (at > least, on ARM). > > Thinking more about that, the real issue here is that arch_mutex_cpu_relax() > doesn't have a corresponding hook on the unlock side. On ARM, for example, > we can enter a low-power state using the wfe instruction, but that requires > the unlocker to wake up the core when the lock is released. > > So, although I'm completely in favour of introducing acquire/release > accessors, I really think the mcs locking routines would benefit from > some arch-specific backend code, even if it's optional (although I would > imagine most architectures implementing something to improve power and/or > performance). > > Will -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-11 21:17 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-11 21:17 UTC (permalink / raw) To: Will Deacon Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On Mon, 2013-11-11 at 18:10 +0000, Will Deacon wrote: > Hello, > > On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote: > > From: Waiman Long <Waiman.Long@hp.com> > > > > This patch corrects the way memory barriers are used in the MCS lock > > with smp_load_acquire and smp_store_release fucnction. > > It removes ones that are not needed. > > > > It uses architecture specific load-acquire and store-release > > primitives for synchronization, if available. Generic implementations > > are provided in case they are not defined even though they may not > > be optimal. These generic implementation could be removed later on > > once changes are made in all the relevant header files. > > > > Suggested-by: Michel Lespinasse <walken@google.com> > > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > > Signed-off-by: Jason Low <jason.low2@hp.com> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > --- > > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > > 1 files changed, 41 insertions(+), 7 deletions(-) > > > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > > index b6f27f8..df5c167 100644 > > --- a/kernel/locking/mcs_spinlock.c > > +++ b/kernel/locking/mcs_spinlock.c > > @@ -23,6 +23,31 @@ > > #endif > > > > /* > > + * Fall back to use the regular atomic operations and memory barrier if > > + * the acquire/release versions are not defined. > > + */ > > +#ifndef xchg_acquire > > +# define xchg_acquire(p, v) xchg(p, v) > > +#endif > > + > > +#ifndef smp_load_acquire > > +# define smp_load_acquire(p) \ > > + ({ \ > > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > > + smp_mb(); \ > > + __v; \ > > + }) > > +#endif > > + > > +#ifndef smp_store_release > > +# define smp_store_release(p, v) \ > > + do { \ > > + smp_mb(); \ > > + ACCESS_ONCE(*(p)) = v; \ > > + } while (0) > > +#endif > > PeterZ already has a series implementing acquire/release accessors, so you > should probably take a look at that rather than rolling your own here. Yes, we are using Peter Z's implementation here. The above is for anything where smp_load_acquire and smp_store_release are *not* defined. We can remove this once all architectures implement the acquire and release functions as mentioned in the comments of the patch. > > You could then augment that with [cmp]xchg_{acquire,release} as > appropriate. > > > +/* > > * In order to acquire the lock, the caller should declare a local node and > > * pass a reference of the node to this function in addition to the lock. > > * If the lock has already been acquired, then this will proceed to spin > > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > node->locked = 0; > > node->next = NULL; > > > > - prev = xchg(lock, node); > > + /* xchg() provides a memory barrier */ > > + prev = xchg_acquire(lock, node); > > if (likely(prev == NULL)) { > > /* Lock acquired */ > > return; > > } > > ACCESS_ONCE(prev->next) = node; > > - smp_wmb(); > > - /* Wait until the lock holder passes the lock down */ > > - while (!ACCESS_ONCE(node->locked)) > > + /* > > + * Wait until the lock holder passes the lock down. > > + * Using smp_load_acquire() provides a memory barrier that > > + * ensures subsequent operations happen after the lock is acquired. > > + */ > > + while (!(smp_load_acquire(&node->locked))) > > arch_mutex_cpu_relax(); > An alternate implementation is while (!ACCESS_ONCE(node->locked)) arch_mutex_cpu_relax(); smp_load_acquire(&node->locked); Leaving the smp_load_acquire at the end to provide appropriate barrier. Will that be acceptable? Tim > After a chat with some micro-architects, I'm going to have to disagree with > Paul here. For architectures where acquire/release are implemented with > explicit barriers (similarly for simple microarchitectures), emitting > barriers in a loop *is* going to have an affect on overall performance, > since those barriers may well result in traffic outside of the core (at > least, on ARM). > > Thinking more about that, the real issue here is that arch_mutex_cpu_relax() > doesn't have a corresponding hook on the unlock side. On ARM, for example, > we can enter a low-power state using the wfe instruction, but that requires > the unlocker to wake up the core when the lock is released. > > So, although I'm completely in favour of introducing acquire/release > accessors, I really think the mcs locking routines would benefit from > some arch-specific backend code, even if it's optional (although I would > imagine most architectures implementing something to improve power and/or > performance). > > Will -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-11 21:17 ` Tim Chen @ 2013-11-12 1:57 ` Waiman Long -1 siblings, 0 replies; 70+ messages in thread From: Waiman Long @ 2013-11-12 1:57 UTC (permalink / raw) To: Tim Chen, Peter Zijlstra Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd On 11/11/2013 04:17 PM, Tim Chen wrote: >> You could then augment that with [cmp]xchg_{acquire,release} as >> appropriate. >> >>> +/* >>> * In order to acquire the lock, the caller should declare a local node and >>> * pass a reference of the node to this function in addition to the lock. >>> * If the lock has already been acquired, then this will proceed to spin >>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) >>> node->locked = 0; >>> node->next = NULL; >>> >>> - prev = xchg(lock, node); >>> + /* xchg() provides a memory barrier */ >>> + prev = xchg_acquire(lock, node); >>> if (likely(prev == NULL)) { >>> /* Lock acquired */ >>> return; >>> } >>> ACCESS_ONCE(prev->next) = node; >>> - smp_wmb(); >>> - /* Wait until the lock holder passes the lock down */ >>> - while (!ACCESS_ONCE(node->locked)) >>> + /* >>> + * Wait until the lock holder passes the lock down. >>> + * Using smp_load_acquire() provides a memory barrier that >>> + * ensures subsequent operations happen after the lock is acquired. >>> + */ >>> + while (!(smp_load_acquire(&node->locked))) >>> arch_mutex_cpu_relax(); > An alternate implementation is > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > smp_load_acquire(&node->locked); > > Leaving the smp_load_acquire at the end to provide appropriate barrier. > Will that be acceptable? > > Tim I second Tim's opinion. It will be help to have a smp_mb_load_acquire() function that provide a memory barrier with load-acquire semantic. I don't think we need one for store-release as that will not be in a loop. Peter, what do you think about adding that to your patch? -Longman -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-12 1:57 ` Waiman Long 0 siblings, 0 replies; 70+ messages in thread From: Waiman Long @ 2013-11-12 1:57 UTC (permalink / raw) To: Tim Chen, Peter Zijlstra Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On 11/11/2013 04:17 PM, Tim Chen wrote: >> You could then augment that with [cmp]xchg_{acquire,release} as >> appropriate. >> >>> +/* >>> * In order to acquire the lock, the caller should declare a local node and >>> * pass a reference of the node to this function in addition to the lock. >>> * If the lock has already been acquired, then this will proceed to spin >>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) >>> node->locked = 0; >>> node->next = NULL; >>> >>> - prev = xchg(lock, node); >>> + /* xchg() provides a memory barrier */ >>> + prev = xchg_acquire(lock, node); >>> if (likely(prev == NULL)) { >>> /* Lock acquired */ >>> return; >>> } >>> ACCESS_ONCE(prev->next) = node; >>> - smp_wmb(); >>> - /* Wait until the lock holder passes the lock down */ >>> - while (!ACCESS_ONCE(node->locked)) >>> + /* >>> + * Wait until the lock holder passes the lock down. >>> + * Using smp_load_acquire() provides a memory barrier that >>> + * ensures subsequent operations happen after the lock is acquired. >>> + */ >>> + while (!(smp_load_acquire(&node->locked))) >>> arch_mutex_cpu_relax(); > An alternate implementation is > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > smp_load_acquire(&node->locked); > > Leaving the smp_load_acquire at the end to provide appropriate barrier. > Will that be acceptable? > > Tim I second Tim's opinion. It will be help to have a smp_mb_load_acquire() function that provide a memory barrier with load-acquire semantic. I don't think we need one for store-release as that will not be in a loop. Peter, what do you think about adding that to your patch? -Longman -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-12 1:57 ` Waiman Long @ 2013-11-19 19:32 ` Paul E. McKenney -1 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:32 UTC (permalink / raw) To: Waiman Long Cc: Tim Chen, Peter Zijlstra, Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote: > On 11/11/2013 04:17 PM, Tim Chen wrote: > >>You could then augment that with [cmp]xchg_{acquire,release} as > >>appropriate. > >> > >>>+/* > >>> * In order to acquire the lock, the caller should declare a local node and > >>> * pass a reference of the node to this function in addition to the lock. > >>> * If the lock has already been acquired, then this will proceed to spin > >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > >>> node->locked = 0; > >>> node->next = NULL; > >>> > >>>- prev = xchg(lock, node); > >>>+ /* xchg() provides a memory barrier */ > >>>+ prev = xchg_acquire(lock, node); > >>> if (likely(prev == NULL)) { > >>> /* Lock acquired */ > >>> return; > >>> } > >>> ACCESS_ONCE(prev->next) = node; > >>>- smp_wmb(); > >>>- /* Wait until the lock holder passes the lock down */ > >>>- while (!ACCESS_ONCE(node->locked)) > >>>+ /* > >>>+ * Wait until the lock holder passes the lock down. > >>>+ * Using smp_load_acquire() provides a memory barrier that > >>>+ * ensures subsequent operations happen after the lock is acquired. > >>>+ */ > >>>+ while (!(smp_load_acquire(&node->locked))) > >>> arch_mutex_cpu_relax(); > >An alternate implementation is > > while (!ACCESS_ONCE(node->locked)) > > arch_mutex_cpu_relax(); > > smp_load_acquire(&node->locked); > > > >Leaving the smp_load_acquire at the end to provide appropriate barrier. > >Will that be acceptable? > > > >Tim > > I second Tim's opinion. It will be help to have a > smp_mb_load_acquire() function that provide a memory barrier with > load-acquire semantic. I don't think we need one for store-release > as that will not be in a loop. Hmmm... I guess the ACCESS_ONCE() in the smp_load_acquire() should prevent it from being optimized away. But yes, you then end up with an extra load on the critical lock hand-off patch. And something like an smp_mb_acquire() could then be useful, although I believe that on all current hardware smp_mb_acquire() emits the same code as would an smp_mb_release(): o barrier() on TSO systems such as x86 and s390. o lwsync instruction on powerpc. (Really old systems would want a different instruction for smp_mb_acquire(), but let's not optimize for really old systems.) o dmb instruction on ARM. o mf instruction on ia64. So how about an smp_mb_acquire_release() to cover both use cases? This could be used to further optimize circular buffers, for example. Thanx, Paul > Peter, what do you think about adding that to your patch? > > -Longman > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 19:32 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:32 UTC (permalink / raw) To: Waiman Long Cc: Tim Chen, Peter Zijlstra, Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote: > On 11/11/2013 04:17 PM, Tim Chen wrote: > >>You could then augment that with [cmp]xchg_{acquire,release} as > >>appropriate. > >> > >>>+/* > >>> * In order to acquire the lock, the caller should declare a local node and > >>> * pass a reference of the node to this function in addition to the lock. > >>> * If the lock has already been acquired, then this will proceed to spin > >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > >>> node->locked = 0; > >>> node->next = NULL; > >>> > >>>- prev = xchg(lock, node); > >>>+ /* xchg() provides a memory barrier */ > >>>+ prev = xchg_acquire(lock, node); > >>> if (likely(prev == NULL)) { > >>> /* Lock acquired */ > >>> return; > >>> } > >>> ACCESS_ONCE(prev->next) = node; > >>>- smp_wmb(); > >>>- /* Wait until the lock holder passes the lock down */ > >>>- while (!ACCESS_ONCE(node->locked)) > >>>+ /* > >>>+ * Wait until the lock holder passes the lock down. > >>>+ * Using smp_load_acquire() provides a memory barrier that > >>>+ * ensures subsequent operations happen after the lock is acquired. > >>>+ */ > >>>+ while (!(smp_load_acquire(&node->locked))) > >>> arch_mutex_cpu_relax(); > >An alternate implementation is > > while (!ACCESS_ONCE(node->locked)) > > arch_mutex_cpu_relax(); > > smp_load_acquire(&node->locked); > > > >Leaving the smp_load_acquire at the end to provide appropriate barrier. > >Will that be acceptable? > > > >Tim > > I second Tim's opinion. It will be help to have a > smp_mb_load_acquire() function that provide a memory barrier with > load-acquire semantic. I don't think we need one for store-release > as that will not be in a loop. Hmmm... I guess the ACCESS_ONCE() in the smp_load_acquire() should prevent it from being optimized away. But yes, you then end up with an extra load on the critical lock hand-off patch. And something like an smp_mb_acquire() could then be useful, although I believe that on all current hardware smp_mb_acquire() emits the same code as would an smp_mb_release(): o barrier() on TSO systems such as x86 and s390. o lwsync instruction on powerpc. (Really old systems would want a different instruction for smp_mb_acquire(), but let's not optimize for really old systems.) o dmb instruction on ARM. o mf instruction on ia64. So how about an smp_mb_acquire_release() to cover both use cases? This could be used to further optimize circular buffers, for example. Thanx, Paul > Peter, what do you think about adding that to your patch? > > -Longman > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-19 19:32 ` Paul E. McKenney @ 2013-11-19 21:45 ` Tim Chen -1 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 21:45 UTC (permalink / raw) To: paulmck Cc: Waiman Long, Peter Zijlstra, Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin On Tue, 2013-11-19 at 11:32 -0800, Paul E. McKenney wrote: > On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote: > > On 11/11/2013 04:17 PM, Tim Chen wrote: > > >>You could then augment that with [cmp]xchg_{acquire,release} as > > >>appropriate. > > >> > > >>>+/* > > >>> * In order to acquire the lock, the caller should declare a local node and > > >>> * pass a reference of the node to this function in addition to the lock. > > >>> * If the lock has already been acquired, then this will proceed to spin > > >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > >>> node->locked = 0; > > >>> node->next = NULL; > > >>> > > >>>- prev = xchg(lock, node); > > >>>+ /* xchg() provides a memory barrier */ > > >>>+ prev = xchg_acquire(lock, node); > > >>> if (likely(prev == NULL)) { > > >>> /* Lock acquired */ > > >>> return; > > >>> } > > >>> ACCESS_ONCE(prev->next) = node; > > >>>- smp_wmb(); > > >>>- /* Wait until the lock holder passes the lock down */ > > >>>- while (!ACCESS_ONCE(node->locked)) > > >>>+ /* > > >>>+ * Wait until the lock holder passes the lock down. > > >>>+ * Using smp_load_acquire() provides a memory barrier that > > >>>+ * ensures subsequent operations happen after the lock is acquired. > > >>>+ */ > > >>>+ while (!(smp_load_acquire(&node->locked))) > > >>> arch_mutex_cpu_relax(); > > >An alternate implementation is > > > while (!ACCESS_ONCE(node->locked)) > > > arch_mutex_cpu_relax(); > > > smp_load_acquire(&node->locked); > > > > > >Leaving the smp_load_acquire at the end to provide appropriate barrier. > > >Will that be acceptable? > > > > > >Tim > > > > I second Tim's opinion. It will be help to have a > > smp_mb_load_acquire() function that provide a memory barrier with > > load-acquire semantic. I don't think we need one for store-release > > as that will not be in a loop. > > Hmmm... I guess the ACCESS_ONCE() in the smp_load_acquire() should > prevent it from being optimized away. But yes, you then end up with > an extra load on the critical lock hand-off patch. And something > like an smp_mb_acquire() could then be useful, although I believe > that on all current hardware smp_mb_acquire() emits the same code > as would an smp_mb_release(): > > o barrier() on TSO systems such as x86 and s390. > > o lwsync instruction on powerpc. (Really old systems would > want a different instruction for smp_mb_acquire(), but let's > not optimize for really old systems.) > > o dmb instruction on ARM. > > o mf instruction on ia64. > > So how about an smp_mb_acquire_release() to cover both use cases? I guess we haven't addressed Will's preference to use wfe for ARM instead of doing a spin loop. I'll like some suggestions on how to proceed here. Should we do arch_mcs_lock and arch_mcs_unlock, which defaults to the existing mcs_lock and mcs_unlock code, but allow architecture specific implementation? Tim > This could be used to further optimize circular buffers, for example. > > Thanx, Paul > > > Peter, what do you think about adding that to your patch? > > > > -Longman > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 21:45 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 21:45 UTC (permalink / raw) To: paulmck Cc: Waiman Long, Peter Zijlstra, Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On Tue, 2013-11-19 at 11:32 -0800, Paul E. McKenney wrote: > On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote: > > On 11/11/2013 04:17 PM, Tim Chen wrote: > > >>You could then augment that with [cmp]xchg_{acquire,release} as > > >>appropriate. > > >> > > >>>+/* > > >>> * In order to acquire the lock, the caller should declare a local node and > > >>> * pass a reference of the node to this function in addition to the lock. > > >>> * If the lock has already been acquired, then this will proceed to spin > > >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > >>> node->locked = 0; > > >>> node->next = NULL; > > >>> > > >>>- prev = xchg(lock, node); > > >>>+ /* xchg() provides a memory barrier */ > > >>>+ prev = xchg_acquire(lock, node); > > >>> if (likely(prev == NULL)) { > > >>> /* Lock acquired */ > > >>> return; > > >>> } > > >>> ACCESS_ONCE(prev->next) = node; > > >>>- smp_wmb(); > > >>>- /* Wait until the lock holder passes the lock down */ > > >>>- while (!ACCESS_ONCE(node->locked)) > > >>>+ /* > > >>>+ * Wait until the lock holder passes the lock down. > > >>>+ * Using smp_load_acquire() provides a memory barrier that > > >>>+ * ensures subsequent operations happen after the lock is acquired. > > >>>+ */ > > >>>+ while (!(smp_load_acquire(&node->locked))) > > >>> arch_mutex_cpu_relax(); > > >An alternate implementation is > > > while (!ACCESS_ONCE(node->locked)) > > > arch_mutex_cpu_relax(); > > > smp_load_acquire(&node->locked); > > > > > >Leaving the smp_load_acquire at the end to provide appropriate barrier. > > >Will that be acceptable? > > > > > >Tim > > > > I second Tim's opinion. It will be help to have a > > smp_mb_load_acquire() function that provide a memory barrier with > > load-acquire semantic. I don't think we need one for store-release > > as that will not be in a loop. > > Hmmm... I guess the ACCESS_ONCE() in the smp_load_acquire() should > prevent it from being optimized away. But yes, you then end up with > an extra load on the critical lock hand-off patch. And something > like an smp_mb_acquire() could then be useful, although I believe > that on all current hardware smp_mb_acquire() emits the same code > as would an smp_mb_release(): > > o barrier() on TSO systems such as x86 and s390. > > o lwsync instruction on powerpc. (Really old systems would > want a different instruction for smp_mb_acquire(), but let's > not optimize for really old systems.) > > o dmb instruction on ARM. > > o mf instruction on ia64. > > So how about an smp_mb_acquire_release() to cover both use cases? I guess we haven't addressed Will's preference to use wfe for ARM instead of doing a spin loop. I'll like some suggestions on how to proceed here. Should we do arch_mcs_lock and arch_mcs_unlock, which defaults to the existing mcs_lock and mcs_unlock code, but allow architecture specific implementation? Tim > This could be used to further optimize circular buffers, for example. > > Thanx, Paul > > > Peter, what do you think about adding that to your patch? > > > > -Longman > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-19 21:45 ` Tim Chen @ 2013-11-19 23:30 ` Paul E. McKenney -1 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 23:30 UTC (permalink / raw) To: Tim Chen Cc: Waiman Long, Peter Zijlstra, Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra On Tue, Nov 19, 2013 at 01:45:03PM -0800, Tim Chen wrote: > On Tue, 2013-11-19 at 11:32 -0800, Paul E. McKenney wrote: > > On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote: > > > On 11/11/2013 04:17 PM, Tim Chen wrote: > > > >>You could then augment that with [cmp]xchg_{acquire,release} as > > > >>appropriate. > > > >> > > > >>>+/* > > > >>> * In order to acquire the lock, the caller should declare a local node and > > > >>> * pass a reference of the node to this function in addition to the lock. > > > >>> * If the lock has already been acquired, then this will proceed to spin > > > >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > >>> node->locked = 0; > > > >>> node->next = NULL; > > > >>> > > > >>>- prev = xchg(lock, node); > > > >>>+ /* xchg() provides a memory barrier */ > > > >>>+ prev = xchg_acquire(lock, node); > > > >>> if (likely(prev == NULL)) { > > > >>> /* Lock acquired */ > > > >>> return; > > > >>> } > > > >>> ACCESS_ONCE(prev->next) = node; > > > >>>- smp_wmb(); > > > >>>- /* Wait until the lock holder passes the lock down */ > > > >>>- while (!ACCESS_ONCE(node->locked)) > > > >>>+ /* > > > >>>+ * Wait until the lock holder passes the lock down. > > > >>>+ * Using smp_load_acquire() provides a memory barrier that > > > >>>+ * ensures subsequent operations happen after the lock is acquired. > > > >>>+ */ > > > >>>+ while (!(smp_load_acquire(&node->locked))) > > > >>> arch_mutex_cpu_relax(); > > > >An alternate implementation is > > > > while (!ACCESS_ONCE(node->locked)) > > > > arch_mutex_cpu_relax(); > > > > smp_load_acquire(&node->locked); > > > > > > > >Leaving the smp_load_acquire at the end to provide appropriate barrier. > > > >Will that be acceptable? > > > > > > > >Tim > > > > > > I second Tim's opinion. It will be help to have a > > > smp_mb_load_acquire() function that provide a memory barrier with > > > load-acquire semantic. I don't think we need one for store-release > > > as that will not be in a loop. > > > > Hmmm... I guess the ACCESS_ONCE() in the smp_load_acquire() should > > prevent it from being optimized away. But yes, you then end up with > > an extra load on the critical lock hand-off patch. And something > > like an smp_mb_acquire() could then be useful, although I believe > > that on all current hardware smp_mb_acquire() emits the same code > > as would an smp_mb_release(): > > > > o barrier() on TSO systems such as x86 and s390. > > > > o lwsync instruction on powerpc. (Really old systems would > > want a different instruction for smp_mb_acquire(), but let's > > not optimize for really old systems.) > > > > o dmb instruction on ARM. > > > > o mf instruction on ia64. > > > > So how about an smp_mb_acquire_release() to cover both use cases? > > I guess we haven't addressed Will's preference to use wfe for ARM > instead of doing a spin loop. I'll like some suggestions on how to > proceed here. Should we do arch_mcs_lock and arch_mcs_unlock, which > defaults to the existing mcs_lock and mcs_unlock code, but allow > architecture specific implementation? It would be nice to confine the architecture-specific pieces if at all possible. For example, can the architecture-specific piece be confined to the actual high-contention lock handoff? Thanx, Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 23:30 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 23:30 UTC (permalink / raw) To: Tim Chen Cc: Waiman Long, Peter Zijlstra, Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On Tue, Nov 19, 2013 at 01:45:03PM -0800, Tim Chen wrote: > On Tue, 2013-11-19 at 11:32 -0800, Paul E. McKenney wrote: > > On Mon, Nov 11, 2013 at 08:57:27PM -0500, Waiman Long wrote: > > > On 11/11/2013 04:17 PM, Tim Chen wrote: > > > >>You could then augment that with [cmp]xchg_{acquire,release} as > > > >>appropriate. > > > >> > > > >>>+/* > > > >>> * In order to acquire the lock, the caller should declare a local node and > > > >>> * pass a reference of the node to this function in addition to the lock. > > > >>> * If the lock has already been acquired, then this will proceed to spin > > > >>>@@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > > >>> node->locked = 0; > > > >>> node->next = NULL; > > > >>> > > > >>>- prev = xchg(lock, node); > > > >>>+ /* xchg() provides a memory barrier */ > > > >>>+ prev = xchg_acquire(lock, node); > > > >>> if (likely(prev == NULL)) { > > > >>> /* Lock acquired */ > > > >>> return; > > > >>> } > > > >>> ACCESS_ONCE(prev->next) = node; > > > >>>- smp_wmb(); > > > >>>- /* Wait until the lock holder passes the lock down */ > > > >>>- while (!ACCESS_ONCE(node->locked)) > > > >>>+ /* > > > >>>+ * Wait until the lock holder passes the lock down. > > > >>>+ * Using smp_load_acquire() provides a memory barrier that > > > >>>+ * ensures subsequent operations happen after the lock is acquired. > > > >>>+ */ > > > >>>+ while (!(smp_load_acquire(&node->locked))) > > > >>> arch_mutex_cpu_relax(); > > > >An alternate implementation is > > > > while (!ACCESS_ONCE(node->locked)) > > > > arch_mutex_cpu_relax(); > > > > smp_load_acquire(&node->locked); > > > > > > > >Leaving the smp_load_acquire at the end to provide appropriate barrier. > > > >Will that be acceptable? > > > > > > > >Tim > > > > > > I second Tim's opinion. It will be help to have a > > > smp_mb_load_acquire() function that provide a memory barrier with > > > load-acquire semantic. I don't think we need one for store-release > > > as that will not be in a loop. > > > > Hmmm... I guess the ACCESS_ONCE() in the smp_load_acquire() should > > prevent it from being optimized away. But yes, you then end up with > > an extra load on the critical lock hand-off patch. And something > > like an smp_mb_acquire() could then be useful, although I believe > > that on all current hardware smp_mb_acquire() emits the same code > > as would an smp_mb_release(): > > > > o barrier() on TSO systems such as x86 and s390. > > > > o lwsync instruction on powerpc. (Really old systems would > > want a different instruction for smp_mb_acquire(), but let's > > not optimize for really old systems.) > > > > o dmb instruction on ARM. > > > > o mf instruction on ia64. > > > > So how about an smp_mb_acquire_release() to cover both use cases? > > I guess we haven't addressed Will's preference to use wfe for ARM > instead of doing a spin loop. I'll like some suggestions on how to > proceed here. Should we do arch_mcs_lock and arch_mcs_unlock, which > defaults to the existing mcs_lock and mcs_unlock code, but allow > architecture specific implementation? It would be nice to confine the architecture-specific pieces if at all possible. For example, can the architecture-specific piece be confined to the actual high-contention lock handoff? Thanx, Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-11 21:17 ` Tim Chen @ 2013-11-12 2:09 ` Waiman Long -1 siblings, 0 replies; 70+ messages in thread From: Waiman Long @ 2013-11-12 2:09 UTC (permalink / raw) To: Tim Chen, Peter Zijlstra Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd On 11/11/2013 04:17 PM, Tim Chen wrote: >> You could then augment that with [cmp]xchg_{acquire,release} as >> appropriate. >> >>> +/* >>> * In order to acquire the lock, the caller should declare a local node and >>> * pass a reference of the node to this function in addition to the lock. >>> * If the lock has already been acquired, then this will proceed to spin >>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) >>> node->locked = 0; >>> node->next = NULL; >>> >>> - prev = xchg(lock, node); >>> + /* xchg() provides a memory barrier */ >>> + prev = xchg_acquire(lock, node); >>> if (likely(prev == NULL)) { >>> /* Lock acquired */ >>> return; >>> } >>> ACCESS_ONCE(prev->next) = node; >>> - smp_wmb(); >>> - /* Wait until the lock holder passes the lock down */ >>> - while (!ACCESS_ONCE(node->locked)) >>> + /* >>> + * Wait until the lock holder passes the lock down. >>> + * Using smp_load_acquire() provides a memory barrier that >>> + * ensures subsequent operations happen after the lock is acquired. >>> + */ >>> + while (!(smp_load_acquire(&node->locked))) >>> arch_mutex_cpu_relax(); > An alternate implementation is > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > smp_load_acquire(&node->locked); > > Leaving the smp_load_acquire at the end to provide appropriate barrier. > Will that be acceptable? > > Tim I second Tim's opinion. It will be help to have a smp_mb_load_acquire() function that provide a memory barrier with load-acquire semantic. I don't think we need one for store-release as that will not be in a loop. Peter, what do you think about adding that to your patch? -Longman -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-12 2:09 ` Waiman Long 0 siblings, 0 replies; 70+ messages in thread From: Waiman Long @ 2013-11-12 2:09 UTC (permalink / raw) To: Tim Chen, Peter Zijlstra Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On 11/11/2013 04:17 PM, Tim Chen wrote: >> You could then augment that with [cmp]xchg_{acquire,release} as >> appropriate. >> >>> +/* >>> * In order to acquire the lock, the caller should declare a local node and >>> * pass a reference of the node to this function in addition to the lock. >>> * If the lock has already been acquired, then this will proceed to spin >>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) >>> node->locked = 0; >>> node->next = NULL; >>> >>> - prev = xchg(lock, node); >>> + /* xchg() provides a memory barrier */ >>> + prev = xchg_acquire(lock, node); >>> if (likely(prev == NULL)) { >>> /* Lock acquired */ >>> return; >>> } >>> ACCESS_ONCE(prev->next) = node; >>> - smp_wmb(); >>> - /* Wait until the lock holder passes the lock down */ >>> - while (!ACCESS_ONCE(node->locked)) >>> + /* >>> + * Wait until the lock holder passes the lock down. >>> + * Using smp_load_acquire() provides a memory barrier that >>> + * ensures subsequent operations happen after the lock is acquired. >>> + */ >>> + while (!(smp_load_acquire(&node->locked))) >>> arch_mutex_cpu_relax(); > An alternate implementation is > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > smp_load_acquire(&node->locked); > > Leaving the smp_load_acquire at the end to provide appropriate barrier. > Will that be acceptable? > > Tim I second Tim's opinion. It will be help to have a smp_mb_load_acquire() function that provide a memory barrier with load-acquire semantic. I don't think we need one for store-release as that will not be in a loop. Peter, what do you think about adding that to your patch? -Longman -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-12 14:54 ` Waiman Long 0 siblings, 0 replies; 70+ messages in thread From: Waiman Long @ 2013-11-12 14:54 UTC (permalink / raw) To: Tim Chen, Peter Zijlstra Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott On 11/11/2013 04:17 PM, Tim Chen wrote: >> You could then augment that with [cmp]xchg_{acquire,release} as >> appropriate. >> >>> +/* >>> * In order to acquire the lock, the caller should declare a local node and >>> * pass a reference of the node to this function in addition to the lock. >>> * If the lock has already been acquired, then this will proceed to spin >>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) >>> node->locked = 0; >>> node->next = NULL; >>> >>> - prev = xchg(lock, node); >>> + /* xchg() provides a memory barrier */ >>> + prev = xchg_acquire(lock, node); >>> if (likely(prev == NULL)) { >>> /* Lock acquired */ >>> return; >>> } >>> ACCESS_ONCE(prev->next) = node; >>> - smp_wmb(); >>> - /* Wait until the lock holder passes the lock down */ >>> - while (!ACCESS_ONCE(node->locked)) >>> + /* >>> + * Wait until the lock holder passes the lock down. >>> + * Using smp_load_acquire() provides a memory barrier that >>> + * ensures subsequent operations happen after the lock is acquired. >>> + */ >>> + while (!(smp_load_acquire(&node->locked))) >>> arch_mutex_cpu_relax(); > An alternate implementation is > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > smp_load_acquire(&node->locked); > > Leaving the smp_load_acquire at the end to provide appropriate barrier. > Will that be acceptable? > > Tim I second Tim's opinion. It will be help to have a smp_mb_load_acquire() function that provide a memory barrier with load-acquire semantic. I don't think we need one for store-release as that will not be in a loop. Peter, what do you think about adding that to your patch? -Longman ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-12 14:54 ` Waiman Long 0 siblings, 0 replies; 70+ messages in thread From: Waiman Long @ 2013-11-12 14:54 UTC (permalink / raw) To: Tim Chen, Peter Zijlstra Cc: Will Deacon, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, Linus Torvalds, Andrea Arcangeli, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On 11/11/2013 04:17 PM, Tim Chen wrote: >> You could then augment that with [cmp]xchg_{acquire,release} as >> appropriate. >> >>> +/* >>> * In order to acquire the lock, the caller should declare a local node and >>> * pass a reference of the node to this function in addition to the lock. >>> * If the lock has already been acquired, then this will proceed to spin >>> @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) >>> node->locked = 0; >>> node->next = NULL; >>> >>> - prev = xchg(lock, node); >>> + /* xchg() provides a memory barrier */ >>> + prev = xchg_acquire(lock, node); >>> if (likely(prev == NULL)) { >>> /* Lock acquired */ >>> return; >>> } >>> ACCESS_ONCE(prev->next) = node; >>> - smp_wmb(); >>> - /* Wait until the lock holder passes the lock down */ >>> - while (!ACCESS_ONCE(node->locked)) >>> + /* >>> + * Wait until the lock holder passes the lock down. >>> + * Using smp_load_acquire() provides a memory barrier that >>> + * ensures subsequent operations happen after the lock is acquired. >>> + */ >>> + while (!(smp_load_acquire(&node->locked))) >>> arch_mutex_cpu_relax(); > An alternate implementation is > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > smp_load_acquire(&node->locked); > > Leaving the smp_load_acquire at the end to provide appropriate barrier. > Will that be acceptable? > > Tim I second Tim's opinion. It will be help to have a smp_mb_load_acquire() function that provide a memory barrier with load-acquire semantic. I don't think we need one for store-release as that will not be in a loop. Peter, what do you think about adding that to your patch? -Longman ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-11 21:17 ` Tim Chen @ 2013-11-12 16:08 ` Will Deacon -1 siblings, 0 replies; 70+ messages in thread From: Will Deacon @ 2013-11-12 16:08 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote: > On Mon, 2013-11-11 at 18:10 +0000, Will Deacon wrote: > > On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote: > > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > > > index b6f27f8..df5c167 100644 > > > --- a/kernel/locking/mcs_spinlock.c > > > +++ b/kernel/locking/mcs_spinlock.c > > > @@ -23,6 +23,31 @@ > > > #endif > > > > > > /* > > > + * Fall back to use the regular atomic operations and memory barrier if > > > + * the acquire/release versions are not defined. > > > + */ > > > +#ifndef xchg_acquire > > > +# define xchg_acquire(p, v) xchg(p, v) > > > +#endif > > > + > > > +#ifndef smp_load_acquire > > > +# define smp_load_acquire(p) \ > > > + ({ \ > > > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > > > + smp_mb(); \ > > > + __v; \ > > > + }) > > > +#endif > > > + > > > +#ifndef smp_store_release > > > +# define smp_store_release(p, v) \ > > > + do { \ > > > + smp_mb(); \ > > > + ACCESS_ONCE(*(p)) = v; \ > > > + } while (0) > > > +#endif > > > > PeterZ already has a series implementing acquire/release accessors, so you > > should probably take a look at that rather than rolling your own here. > > Yes, we are using Peter Z's implementation here. The above is for anything > where smp_load_acquire and smp_store_release are *not* defined. We can > remove this once all architectures implement the acquire and release > functions as mentioned in the comments of the patch. Right, so you can use barrier.h and asm-generic will define generic versions (identical to the above) for you if the architecture doesn't have an optimised variant. You don't need to reproduce that in your .c file. > > > + /* > > > + * Wait until the lock holder passes the lock down. > > > + * Using smp_load_acquire() provides a memory barrier that > > > + * ensures subsequent operations happen after the lock is acquired. > > > + */ > > > + while (!(smp_load_acquire(&node->locked))) > > > arch_mutex_cpu_relax(); > > > > After a chat with some micro-architects, I'm going to have to disagree with > > Paul here. For architectures where acquire/release are implemented with > > explicit barriers (similarly for simple microarchitectures), emitting > > barriers in a loop *is* going to have an affect on overall performance, > > since those barriers may well result in traffic outside of the core (at > > least, on ARM). > > > > Thinking more about that, the real issue here is that arch_mutex_cpu_relax() > > doesn't have a corresponding hook on the unlock side. On ARM, for example, > > we can enter a low-power state using the wfe instruction, but that requires > > the unlocker to wake up the core when the lock is released. > > An alternate implementation is > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > smp_load_acquire(&node->locked); > > Leaving the smp_load_acquire at the end to provide appropriate barrier. > Will that be acceptable? It still doesn't solve my problem though: I want a way to avoid that busy loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook is a start, but there is no corresponding hook on the unlock side to issue a wakeup. Given a sensible relax implementation, I don't have an issue with putting a load-acquire in a loop, since it shouldn't be aggresively spinning anymore. Will -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-12 16:08 ` Will Deacon 0 siblings, 0 replies; 70+ messages in thread From: Will Deacon @ 2013-11-12 16:08 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Paul E.McKenney, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote: > On Mon, 2013-11-11 at 18:10 +0000, Will Deacon wrote: > > On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote: > > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > > > index b6f27f8..df5c167 100644 > > > --- a/kernel/locking/mcs_spinlock.c > > > +++ b/kernel/locking/mcs_spinlock.c > > > @@ -23,6 +23,31 @@ > > > #endif > > > > > > /* > > > + * Fall back to use the regular atomic operations and memory barrier if > > > + * the acquire/release versions are not defined. > > > + */ > > > +#ifndef xchg_acquire > > > +# define xchg_acquire(p, v) xchg(p, v) > > > +#endif > > > + > > > +#ifndef smp_load_acquire > > > +# define smp_load_acquire(p) \ > > > + ({ \ > > > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > > > + smp_mb(); \ > > > + __v; \ > > > + }) > > > +#endif > > > + > > > +#ifndef smp_store_release > > > +# define smp_store_release(p, v) \ > > > + do { \ > > > + smp_mb(); \ > > > + ACCESS_ONCE(*(p)) = v; \ > > > + } while (0) > > > +#endif > > > > PeterZ already has a series implementing acquire/release accessors, so you > > should probably take a look at that rather than rolling your own here. > > Yes, we are using Peter Z's implementation here. The above is for anything > where smp_load_acquire and smp_store_release are *not* defined. We can > remove this once all architectures implement the acquire and release > functions as mentioned in the comments of the patch. Right, so you can use barrier.h and asm-generic will define generic versions (identical to the above) for you if the architecture doesn't have an optimised variant. You don't need to reproduce that in your .c file. > > > + /* > > > + * Wait until the lock holder passes the lock down. > > > + * Using smp_load_acquire() provides a memory barrier that > > > + * ensures subsequent operations happen after the lock is acquired. > > > + */ > > > + while (!(smp_load_acquire(&node->locked))) > > > arch_mutex_cpu_relax(); > > > > After a chat with some micro-architects, I'm going to have to disagree with > > Paul here. For architectures where acquire/release are implemented with > > explicit barriers (similarly for simple microarchitectures), emitting > > barriers in a loop *is* going to have an affect on overall performance, > > since those barriers may well result in traffic outside of the core (at > > least, on ARM). > > > > Thinking more about that, the real issue here is that arch_mutex_cpu_relax() > > doesn't have a corresponding hook on the unlock side. On ARM, for example, > > we can enter a low-power state using the wfe instruction, but that requires > > the unlocker to wake up the core when the lock is released. > > An alternate implementation is > while (!ACCESS_ONCE(node->locked)) > arch_mutex_cpu_relax(); > smp_load_acquire(&node->locked); > > Leaving the smp_load_acquire at the end to provide appropriate barrier. > Will that be acceptable? It still doesn't solve my problem though: I want a way to avoid that busy loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook is a start, but there is no corresponding hook on the unlock side to issue a wakeup. Given a sensible relax implementation, I don't have an issue with putting a load-acquire in a loop, since it shouldn't be aggresively spinning anymore. Will -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-12 16:08 ` Will Deacon @ 2013-11-12 17:16 ` George Spelvin -1 siblings, 0 replies; 70+ messages in thread From: George Spelvin @ 2013-11-12 17:16 UTC (permalink / raw) To: tim.c.chen, will.deacon Cc: a.p.zijlstra, aarcange, akpm, alex.shi, andi, arnd, aswin, dave.hansen, davidlohr.bueso, figo1802, hpa, linux-arch, linux-kernel, linux-mm, linux, matthew.r.wilcox, mingo, paulmck, peter, raghavendra.kt, riel, scott.norton, tglx, torvalds, waiman.long, walken > On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote: >> An alternate implementation is >> while (!ACCESS_ONCE(node->locked)) >> arch_mutex_cpu_relax(); >> smp_load_acquire(&node->locked); >> >> Leaving the smp_load_acquire at the end to provide appropriate barrier. >> Will that be acceptable? Will Deacon <will.deacon@arm.com> wrote: > It still doesn't solve my problem though: I want a way to avoid that busy > loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook > is a start, but there is no corresponding hook on the unlock side to issue a > wakeup. Given a sensible relax implementation, I don't have an issue with > putting a load-acquire in a loop, since it shouldn't be aggresively spinning > anymore. So you want something like this? /* * This is a spin-wait with acquire semantics. That is, accesses after * this are not allowed to be reordered before the load that meets * the specified condition. This requires that it end with either a * load-acquire or a full smp_mb(). The optimal way to do this is likely * to be architecture-dependent. E.g. x86 MONITOR/MWAIT instructions. */ #ifndef smp_load_acquire_until #define smp_load_acquire_until(addr, cond) \ while (!(smp_load_acquire(addr) cond)) { \ do { \ arch_mutex_cpu_relax(); \ } while (!(ACCESS_ONCE(*(addr)) cond)); \ } #endif smp_load_acquire_until(&node->locked, != 0); Alternative implementations: #define smp_load_acquire_until(addr, cond) { \ while (!(ACCESS_ONCE(*(addr)) cond)) \ arch_mutex_cpu_relax(); \ smp_mb(); } #define smp_load_acquire_until(addr, cond) \ if (!(smp_load_acquire(addr) cond)) { \ do { \ arch_mutex_cpu_relax(); \ } while (!(ACCESS_ONCE(*(addr)) cond)); \ smp_mb(); \ } ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-12 17:16 ` George Spelvin 0 siblings, 0 replies; 70+ messages in thread From: George Spelvin @ 2013-11-12 17:16 UTC (permalink / raw) To: tim.c.chen, will.deacon Cc: a.p.zijlstra, aarcange, akpm, alex.shi, andi, arnd, aswin, dave.hansen, davidlohr.bueso, figo1802, hpa, linux-arch, linux-kernel, linux-mm, linux, matthew.r.wilcox, mingo, paulmck, peter, raghavendra.kt, riel, scott.norton, tglx, torvalds, waiman.long, walken > On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote: >> An alternate implementation is >> while (!ACCESS_ONCE(node->locked)) >> arch_mutex_cpu_relax(); >> smp_load_acquire(&node->locked); >> >> Leaving the smp_load_acquire at the end to provide appropriate barrier. >> Will that be acceptable? Will Deacon <will.deacon@arm.com> wrote: > It still doesn't solve my problem though: I want a way to avoid that busy > loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook > is a start, but there is no corresponding hook on the unlock side to issue a > wakeup. Given a sensible relax implementation, I don't have an issue with > putting a load-acquire in a loop, since it shouldn't be aggresively spinning > anymore. So you want something like this? /* * This is a spin-wait with acquire semantics. That is, accesses after * this are not allowed to be reordered before the load that meets * the specified condition. This requires that it end with either a * load-acquire or a full smp_mb(). The optimal way to do this is likely * to be architecture-dependent. E.g. x86 MONITOR/MWAIT instructions. */ #ifndef smp_load_acquire_until #define smp_load_acquire_until(addr, cond) \ while (!(smp_load_acquire(addr) cond)) { \ do { \ arch_mutex_cpu_relax(); \ } while (!(ACCESS_ONCE(*(addr)) cond)); \ } #endif smp_load_acquire_until(&node->locked, != 0); Alternative implementations: #define smp_load_acquire_until(addr, cond) { \ while (!(ACCESS_ONCE(*(addr)) cond)) \ arch_mutex_cpu_relax(); \ smp_mb(); } #define smp_load_acquire_until(addr, cond) \ if (!(smp_load_acquire(addr) cond)) { \ do { \ arch_mutex_cpu_relax(); \ } while (!(ACCESS_ONCE(*(addr)) cond)); \ smp_mb(); \ } -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-12 17:16 ` George Spelvin @ 2013-11-13 17:37 ` Will Deacon -1 siblings, 0 replies; 70+ messages in thread From: Will Deacon @ 2013-11-13 17:37 UTC (permalink / raw) To: George Spelvin Cc: tim.c.chen@linux.intel.com, a.p.zijlstra@chello.nl, aarcange@redhat.com, akpm@linux-foundation.org, alex.shi@linaro.org, andi@firstfloor.org, arnd@arndb.de, aswin@hp.com, dave.hansen@intel.com, davidlohr.bueso@hp.com, figo1802@gmail.com, hpa@zytor.com, linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, matthew.r.wilcox@intel.com, mingo@elte.hu, paulmck@linux.vnet.ibm.com On Tue, Nov 12, 2013 at 05:16:33PM +0000, George Spelvin wrote: > > On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote: > >> An alternate implementation is > >> while (!ACCESS_ONCE(node->locked)) > >> arch_mutex_cpu_relax(); > >> smp_load_acquire(&node->locked); > >> > >> Leaving the smp_load_acquire at the end to provide appropriate barrier. > >> Will that be acceptable? > > Will Deacon <will.deacon@arm.com> wrote: > > It still doesn't solve my problem though: I want a way to avoid that busy > > loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook > > is a start, but there is no corresponding hook on the unlock side to issue a > > wakeup. Given a sensible relax implementation, I don't have an issue with > > putting a load-acquire in a loop, since it shouldn't be aggresively spinning > > anymore. > > So you want something like this? > > /* > * This is a spin-wait with acquire semantics. That is, accesses after > * this are not allowed to be reordered before the load that meets > * the specified condition. This requires that it end with either a > * load-acquire or a full smp_mb(). The optimal way to do this is likely > * to be architecture-dependent. E.g. x86 MONITOR/MWAIT instructions. > */ > #ifndef smp_load_acquire_until > #define smp_load_acquire_until(addr, cond) \ > while (!(smp_load_acquire(addr) cond)) { \ > do { \ > arch_mutex_cpu_relax(); \ > } while (!(ACCESS_ONCE(*(addr)) cond)); \ > } > #endif > > smp_load_acquire_until(&node->locked, != 0); > > Alternative implementations: > > #define smp_load_acquire_until(addr, cond) { \ > while (!(ACCESS_ONCE(*(addr)) cond)) \ > arch_mutex_cpu_relax(); \ > smp_mb(); } > > #define smp_load_acquire_until(addr, cond) \ > if (!(smp_load_acquire(addr) cond)) { \ > do { \ > arch_mutex_cpu_relax(); \ > } while (!(ACCESS_ONCE(*(addr)) cond)); \ > smp_mb(); \ > } Not really... To be clear: having the load-acquire in a loop is fine, provided that arch_mutex_cpu_relax is something which causes the load to back-off (you mentioned the MONITOR/MWAIT instructions on x86). On ARM, our equivalent of those instructions also has a counterpart instruction that needs to be executed by the CPU doing the unlock. That means we can do one of two things: 1. Add an arch hook in the unlock path to pair with the relax() call on the lock path (arch_mutex_cpu_wake() ?). 2. Move most of the code into arch_mcs_[un]lock, like we do for spinlocks. Whilst (1) would suffice, (2) would allow further optimisation on arm64, where we can play tricks to avoid the explicit wakeup if we can control the way in which the lock value is written. Will -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-13 17:37 ` Will Deacon 0 siblings, 0 replies; 70+ messages in thread From: Will Deacon @ 2013-11-13 17:37 UTC (permalink / raw) To: George Spelvin Cc: tim.c.chen@linux.intel.com, a.p.zijlstra@chello.nl, aarcange@redhat.com, akpm@linux-foundation.org, alex.shi@linaro.org, andi@firstfloor.org, arnd@arndb.de, aswin@hp.com, dave.hansen@intel.com, davidlohr.bueso@hp.com, figo1802@gmail.com, hpa@zytor.com, linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, matthew.r.wilcox@intel.com, mingo@elte.hu, paulmck@linux.vnet.ibm.com, peter@hurleysoftware.com, raghavendra.kt@linux.vnet.ibm.com, riel@redhat.com, scott.norton@hp.com, tglx@linutronix.de, torvalds@linux-foundation.org, waiman.long@hp.com, walken@google.com On Tue, Nov 12, 2013 at 05:16:33PM +0000, George Spelvin wrote: > > On Mon, Nov 11, 2013 at 09:17:52PM +0000, Tim Chen wrote: > >> An alternate implementation is > >> while (!ACCESS_ONCE(node->locked)) > >> arch_mutex_cpu_relax(); > >> smp_load_acquire(&node->locked); > >> > >> Leaving the smp_load_acquire at the end to provide appropriate barrier. > >> Will that be acceptable? > > Will Deacon <will.deacon@arm.com> wrote: > > It still doesn't solve my problem though: I want a way to avoid that busy > > loop by some architecture-specific manner. The arch_mutex_cpu_relax() hook > > is a start, but there is no corresponding hook on the unlock side to issue a > > wakeup. Given a sensible relax implementation, I don't have an issue with > > putting a load-acquire in a loop, since it shouldn't be aggresively spinning > > anymore. > > So you want something like this? > > /* > * This is a spin-wait with acquire semantics. That is, accesses after > * this are not allowed to be reordered before the load that meets > * the specified condition. This requires that it end with either a > * load-acquire or a full smp_mb(). The optimal way to do this is likely > * to be architecture-dependent. E.g. x86 MONITOR/MWAIT instructions. > */ > #ifndef smp_load_acquire_until > #define smp_load_acquire_until(addr, cond) \ > while (!(smp_load_acquire(addr) cond)) { \ > do { \ > arch_mutex_cpu_relax(); \ > } while (!(ACCESS_ONCE(*(addr)) cond)); \ > } > #endif > > smp_load_acquire_until(&node->locked, != 0); > > Alternative implementations: > > #define smp_load_acquire_until(addr, cond) { \ > while (!(ACCESS_ONCE(*(addr)) cond)) \ > arch_mutex_cpu_relax(); \ > smp_mb(); } > > #define smp_load_acquire_until(addr, cond) \ > if (!(smp_load_acquire(addr) cond)) { \ > do { \ > arch_mutex_cpu_relax(); \ > } while (!(ACCESS_ONCE(*(addr)) cond)); \ > smp_mb(); \ > } Not really... To be clear: having the load-acquire in a loop is fine, provided that arch_mutex_cpu_relax is something which causes the load to back-off (you mentioned the MONITOR/MWAIT instructions on x86). On ARM, our equivalent of those instructions also has a counterpart instruction that needs to be executed by the CPU doing the unlock. That means we can do one of two things: 1. Add an arch hook in the unlock path to pair with the relax() call on the lock path (arch_mutex_cpu_wake() ?). 2. Move most of the code into arch_mcs_[un]lock, like we do for spinlocks. Whilst (1) would suffice, (2) would allow further optimisation on arm64, where we can play tricks to avoid the explicit wakeup if we can control the way in which the lock value is written. Will -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections 2013-11-11 18:10 ` Will Deacon @ 2013-11-19 19:26 ` Paul E. McKenney -1 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:26 UTC (permalink / raw) To: Will Deacon Cc: Tim Chen, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote: > Hello, > > On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote: > > From: Waiman Long <Waiman.Long@hp.com> > > > > This patch corrects the way memory barriers are used in the MCS lock > > with smp_load_acquire and smp_store_release fucnction. > > It removes ones that are not needed. > > > > It uses architecture specific load-acquire and store-release > > primitives for synchronization, if available. Generic implementations > > are provided in case they are not defined even though they may not > > be optimal. These generic implementation could be removed later on > > once changes are made in all the relevant header files. > > > > Suggested-by: Michel Lespinasse <walken@google.com> > > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > > Signed-off-by: Jason Low <jason.low2@hp.com> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > --- > > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > > 1 files changed, 41 insertions(+), 7 deletions(-) > > > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > > index b6f27f8..df5c167 100644 > > --- a/kernel/locking/mcs_spinlock.c > > +++ b/kernel/locking/mcs_spinlock.c > > @@ -23,6 +23,31 @@ > > #endif > > > > /* > > + * Fall back to use the regular atomic operations and memory barrier if > > + * the acquire/release versions are not defined. > > + */ > > +#ifndef xchg_acquire > > +# define xchg_acquire(p, v) xchg(p, v) > > +#endif > > + > > +#ifndef smp_load_acquire > > +# define smp_load_acquire(p) \ > > + ({ \ > > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > > + smp_mb(); \ > > + __v; \ > > + }) > > +#endif > > + > > +#ifndef smp_store_release > > +# define smp_store_release(p, v) \ > > + do { \ > > + smp_mb(); \ > > + ACCESS_ONCE(*(p)) = v; \ > > + } while (0) > > +#endif > > PeterZ already has a series implementing acquire/release accessors, so you > should probably take a look at that rather than rolling your own here. > > You could then augment that with [cmp]xchg_{acquire,release} as > appropriate. > > > +/* > > * In order to acquire the lock, the caller should declare a local node and > > * pass a reference of the node to this function in addition to the lock. > > * If the lock has already been acquired, then this will proceed to spin > > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > node->locked = 0; > > node->next = NULL; > > > > - prev = xchg(lock, node); > > + /* xchg() provides a memory barrier */ > > + prev = xchg_acquire(lock, node); > > if (likely(prev == NULL)) { > > /* Lock acquired */ > > return; > > } > > ACCESS_ONCE(prev->next) = node; > > - smp_wmb(); > > - /* Wait until the lock holder passes the lock down */ > > - while (!ACCESS_ONCE(node->locked)) > > + /* > > + * Wait until the lock holder passes the lock down. > > + * Using smp_load_acquire() provides a memory barrier that > > + * ensures subsequent operations happen after the lock is acquired. > > + */ > > + while (!(smp_load_acquire(&node->locked))) > > arch_mutex_cpu_relax(); > > After a chat with some micro-architects, I'm going to have to disagree with > Paul here. For architectures where acquire/release are implemented with > explicit barriers (similarly for simple microarchitectures), emitting > barriers in a loop *is* going to have an affect on overall performance, > since those barriers may well result in traffic outside of the core (at > least, on ARM). OK, that is a new one on me... > Thinking more about that, the real issue here is that arch_mutex_cpu_relax() > doesn't have a corresponding hook on the unlock side. On ARM, for example, > we can enter a low-power state using the wfe instruction, but that requires > the unlocker to wake up the core when the lock is released. > > So, although I'm completely in favour of introducing acquire/release > accessors, I really think the mcs locking routines would benefit from > some arch-specific backend code, even if it's optional (although I would > imagine most architectures implementing something to improve power and/or > performance). I would argue that if we have so much lock contention that spinners want to enter low-power states, other fixes are in order. Especially given relatively small CPU counts. Thanx, Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 19:26 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:26 UTC (permalink / raw) To: Will Deacon Cc: Tim Chen, Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel@vger.kernel.org, linux-mm, linux-arch@vger.kernel.org, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Figo.zhang On Mon, Nov 11, 2013 at 06:10:49PM +0000, Will Deacon wrote: > Hello, > > On Fri, Nov 08, 2013 at 07:52:38PM +0000, Tim Chen wrote: > > From: Waiman Long <Waiman.Long@hp.com> > > > > This patch corrects the way memory barriers are used in the MCS lock > > with smp_load_acquire and smp_store_release fucnction. > > It removes ones that are not needed. > > > > It uses architecture specific load-acquire and store-release > > primitives for synchronization, if available. Generic implementations > > are provided in case they are not defined even though they may not > > be optimal. These generic implementation could be removed later on > > once changes are made in all the relevant header files. > > > > Suggested-by: Michel Lespinasse <walken@google.com> > > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > > Signed-off-by: Jason Low <jason.low2@hp.com> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > --- > > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > > 1 files changed, 41 insertions(+), 7 deletions(-) > > > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > > index b6f27f8..df5c167 100644 > > --- a/kernel/locking/mcs_spinlock.c > > +++ b/kernel/locking/mcs_spinlock.c > > @@ -23,6 +23,31 @@ > > #endif > > > > /* > > + * Fall back to use the regular atomic operations and memory barrier if > > + * the acquire/release versions are not defined. > > + */ > > +#ifndef xchg_acquire > > +# define xchg_acquire(p, v) xchg(p, v) > > +#endif > > + > > +#ifndef smp_load_acquire > > +# define smp_load_acquire(p) \ > > + ({ \ > > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > > + smp_mb(); \ > > + __v; \ > > + }) > > +#endif > > + > > +#ifndef smp_store_release > > +# define smp_store_release(p, v) \ > > + do { \ > > + smp_mb(); \ > > + ACCESS_ONCE(*(p)) = v; \ > > + } while (0) > > +#endif > > PeterZ already has a series implementing acquire/release accessors, so you > should probably take a look at that rather than rolling your own here. > > You could then augment that with [cmp]xchg_{acquire,release} as > appropriate. > > > +/* > > * In order to acquire the lock, the caller should declare a local node and > > * pass a reference of the node to this function in addition to the lock. > > * If the lock has already been acquired, then this will proceed to spin > > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > node->locked = 0; > > node->next = NULL; > > > > - prev = xchg(lock, node); > > + /* xchg() provides a memory barrier */ > > + prev = xchg_acquire(lock, node); > > if (likely(prev == NULL)) { > > /* Lock acquired */ > > return; > > } > > ACCESS_ONCE(prev->next) = node; > > - smp_wmb(); > > - /* Wait until the lock holder passes the lock down */ > > - while (!ACCESS_ONCE(node->locked)) > > + /* > > + * Wait until the lock holder passes the lock down. > > + * Using smp_load_acquire() provides a memory barrier that > > + * ensures subsequent operations happen after the lock is acquired. > > + */ > > + while (!(smp_load_acquire(&node->locked))) > > arch_mutex_cpu_relax(); > > After a chat with some micro-architects, I'm going to have to disagree with > Paul here. For architectures where acquire/release are implemented with > explicit barriers (similarly for simple microarchitectures), emitting > barriers in a loop *is* going to have an affect on overall performance, > since those barriers may well result in traffic outside of the core (at > least, on ARM). OK, that is a new one on me... > Thinking more about that, the real issue here is that arch_mutex_cpu_relax() > doesn't have a corresponding hook on the unlock side. On ARM, for example, > we can enter a low-power state using the wfe instruction, but that requires > the unlocker to wake up the core when the lock is released. > > So, although I'm completely in favour of introducing acquire/release > accessors, I really think the mcs locking routines would benefit from > some arch-specific backend code, even if it's optional (although I would > imagine most architectures implementing something to improve power and/or > performance). I would argue that if we have so much lock contention that spinners want to enter low-power states, other fixes are in order. Especially given relatively small CPU counts. Thanx, Paul -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 19:21 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:21 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote: > From: Waiman Long <Waiman.Long@hp.com> > > This patch corrects the way memory barriers are used in the MCS lock > with smp_load_acquire and smp_store_release fucnction. > It removes ones that are not needed. > > It uses architecture specific load-acquire and store-release > primitives for synchronization, if available. Generic implementations > are provided in case they are not defined even though they may not > be optimal. These generic implementation could be removed later on > once changes are made in all the relevant header files. > > Suggested-by: Michel Lespinasse <walken@google.com> > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > Signed-off-by: Jason Low <jason.low2@hp.com> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Please see comments below. Thanx, Paul > --- > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > 1 files changed, 41 insertions(+), 7 deletions(-) > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > index b6f27f8..df5c167 100644 > --- a/kernel/locking/mcs_spinlock.c > +++ b/kernel/locking/mcs_spinlock.c > @@ -23,6 +23,31 @@ > #endif > > /* > + * Fall back to use the regular atomic operations and memory barrier if > + * the acquire/release versions are not defined. > + */ > +#ifndef xchg_acquire > +# define xchg_acquire(p, v) xchg(p, v) > +#endif > + > +#ifndef smp_load_acquire > +# define smp_load_acquire(p) \ > + ({ \ > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > + smp_mb(); \ > + __v; \ > + }) > +#endif > + > +#ifndef smp_store_release > +# define smp_store_release(p, v) \ > + do { \ > + smp_mb(); \ > + ACCESS_ONCE(*(p)) = v; \ > + } while (0) > +#endif > + > +/* > * In order to acquire the lock, the caller should declare a local node and > * pass a reference of the node to this function in addition to the lock. > * If the lock has already been acquired, then this will proceed to spin > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > node->locked = 0; > node->next = NULL; > > - prev = xchg(lock, node); > + /* xchg() provides a memory barrier */ > + prev = xchg_acquire(lock, node); But if this is xchg_acquire() with only acquire semantics, it need not ensure that the initializations of node->locked and node->next above will happen before the "ACCESS_ONCE(prev->next) = node" below. This therefore needs to remain xchg(). Or you need an smp_store_release() below instead of an ACCESS_ONCE() assignment. As currently written, the poor CPU doing the unlock can be fatally disappointed by seeing pre-initialized values of ->locked and ->next. This could, among other things, result in a hang where the handoff happens before the initialization. > if (likely(prev == NULL)) { > /* Lock acquired */ > return; > } > ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > + /* > + * Wait until the lock holder passes the lock down. > + * Using smp_load_acquire() provides a memory barrier that > + * ensures subsequent operations happen after the lock is acquired. > + */ > + while (!(smp_load_acquire(&node->locked))) > arch_mutex_cpu_relax(); OK, this smp_load_acquire() makes sense! > } > EXPORT_SYMBOL_GPL(mcs_spin_lock); > @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock); > * Releases the lock. The caller should pass in the corresponding node that > * was used to acquire the lock. > */ > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > { > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > while (!(next = ACCESS_ONCE(node->next))) > arch_mutex_cpu_relax(); > } > - ACCESS_ONCE(next->locked) = 1; > - smp_wmb(); > + /* > + * Pass lock to next waiter. > + * smp_store_release() provides a memory barrier to ensure > + * all operations in the critical section has been completed > + * before unlocking. > + */ > + smp_store_release(&next->locked , 1); This smp_store_release() makes sense as well! Could you please get rid of the extraneous space before the comma? > } > EXPORT_SYMBOL_GPL(mcs_spin_unlock); > -- > 1.7.4.4 > > ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 19:21 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:21 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote: > From: Waiman Long <Waiman.Long@hp.com> > > This patch corrects the way memory barriers are used in the MCS lock > with smp_load_acquire and smp_store_release fucnction. > It removes ones that are not needed. > > It uses architecture specific load-acquire and store-release > primitives for synchronization, if available. Generic implementations > are provided in case they are not defined even though they may not > be optimal. These generic implementation could be removed later on > once changes are made in all the relevant header files. > > Suggested-by: Michel Lespinasse <walken@google.com> > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > Signed-off-by: Jason Low <jason.low2@hp.com> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Please see comments below. Thanx, Paul > --- > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > 1 files changed, 41 insertions(+), 7 deletions(-) > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > index b6f27f8..df5c167 100644 > --- a/kernel/locking/mcs_spinlock.c > +++ b/kernel/locking/mcs_spinlock.c > @@ -23,6 +23,31 @@ > #endif > > /* > + * Fall back to use the regular atomic operations and memory barrier if > + * the acquire/release versions are not defined. > + */ > +#ifndef xchg_acquire > +# define xchg_acquire(p, v) xchg(p, v) > +#endif > + > +#ifndef smp_load_acquire > +# define smp_load_acquire(p) \ > + ({ \ > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > + smp_mb(); \ > + __v; \ > + }) > +#endif > + > +#ifndef smp_store_release > +# define smp_store_release(p, v) \ > + do { \ > + smp_mb(); \ > + ACCESS_ONCE(*(p)) = v; \ > + } while (0) > +#endif > + > +/* > * In order to acquire the lock, the caller should declare a local node and > * pass a reference of the node to this function in addition to the lock. > * If the lock has already been acquired, then this will proceed to spin > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > node->locked = 0; > node->next = NULL; > > - prev = xchg(lock, node); > + /* xchg() provides a memory barrier */ > + prev = xchg_acquire(lock, node); But if this is xchg_acquire() with only acquire semantics, it need not ensure that the initializations of node->locked and node->next above will happen before the "ACCESS_ONCE(prev->next) = node" below. This therefore needs to remain xchg(). Or you need an smp_store_release() below instead of an ACCESS_ONCE() assignment. As currently written, the poor CPU doing the unlock can be fatally disappointed by seeing pre-initialized values of ->locked and ->next. This could, among other things, result in a hang where the handoff happens before the initialization. > if (likely(prev == NULL)) { > /* Lock acquired */ > return; > } > ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > + /* > + * Wait until the lock holder passes the lock down. > + * Using smp_load_acquire() provides a memory barrier that > + * ensures subsequent operations happen after the lock is acquired. > + */ > + while (!(smp_load_acquire(&node->locked))) > arch_mutex_cpu_relax(); OK, this smp_load_acquire() makes sense! > } > EXPORT_SYMBOL_GPL(mcs_spin_lock); > @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock); > * Releases the lock. The caller should pass in the corresponding node that > * was used to acquire the lock. > */ > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > { > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > while (!(next = ACCESS_ONCE(node->next))) > arch_mutex_cpu_relax(); > } > - ACCESS_ONCE(next->locked) = 1; > - smp_wmb(); > + /* > + * Pass lock to next waiter. > + * smp_store_release() provides a memory barrier to ensure > + * all operations in the critical section has been completed > + * before unlocking. > + */ > + smp_store_release(&next->locked , 1); This smp_store_release() makes sense as well! Could you please get rid of the extraneous space before the comma? > } > EXPORT_SYMBOL_GPL(mcs_spin_unlock); > -- > 1.7.4.4 > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 19:21 ` Paul E. McKenney 0 siblings, 0 replies; 70+ messages in thread From: Paul E. McKenney @ 2013-11-19 19:21 UTC (permalink / raw) To: Tim Chen Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote: > From: Waiman Long <Waiman.Long@hp.com> > > This patch corrects the way memory barriers are used in the MCS lock > with smp_load_acquire and smp_store_release fucnction. > It removes ones that are not needed. > > It uses architecture specific load-acquire and store-release > primitives for synchronization, if available. Generic implementations > are provided in case they are not defined even though they may not > be optimal. These generic implementation could be removed later on > once changes are made in all the relevant header files. > > Suggested-by: Michel Lespinasse <walken@google.com> > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > Signed-off-by: Jason Low <jason.low2@hp.com> > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Please see comments below. Thanx, Paul > --- > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > 1 files changed, 41 insertions(+), 7 deletions(-) > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > index b6f27f8..df5c167 100644 > --- a/kernel/locking/mcs_spinlock.c > +++ b/kernel/locking/mcs_spinlock.c > @@ -23,6 +23,31 @@ > #endif > > /* > + * Fall back to use the regular atomic operations and memory barrier if > + * the acquire/release versions are not defined. > + */ > +#ifndef xchg_acquire > +# define xchg_acquire(p, v) xchg(p, v) > +#endif > + > +#ifndef smp_load_acquire > +# define smp_load_acquire(p) \ > + ({ \ > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > + smp_mb(); \ > + __v; \ > + }) > +#endif > + > +#ifndef smp_store_release > +# define smp_store_release(p, v) \ > + do { \ > + smp_mb(); \ > + ACCESS_ONCE(*(p)) = v; \ > + } while (0) > +#endif > + > +/* > * In order to acquire the lock, the caller should declare a local node and > * pass a reference of the node to this function in addition to the lock. > * If the lock has already been acquired, then this will proceed to spin > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > node->locked = 0; > node->next = NULL; > > - prev = xchg(lock, node); > + /* xchg() provides a memory barrier */ > + prev = xchg_acquire(lock, node); But if this is xchg_acquire() with only acquire semantics, it need not ensure that the initializations of node->locked and node->next above will happen before the "ACCESS_ONCE(prev->next) = node" below. This therefore needs to remain xchg(). Or you need an smp_store_release() below instead of an ACCESS_ONCE() assignment. As currently written, the poor CPU doing the unlock can be fatally disappointed by seeing pre-initialized values of ->locked and ->next. This could, among other things, result in a hang where the handoff happens before the initialization. > if (likely(prev == NULL)) { > /* Lock acquired */ > return; > } > ACCESS_ONCE(prev->next) = node; > - smp_wmb(); > - /* Wait until the lock holder passes the lock down */ > - while (!ACCESS_ONCE(node->locked)) > + /* > + * Wait until the lock holder passes the lock down. > + * Using smp_load_acquire() provides a memory barrier that > + * ensures subsequent operations happen after the lock is acquired. > + */ > + while (!(smp_load_acquire(&node->locked))) > arch_mutex_cpu_relax(); OK, this smp_load_acquire() makes sense! > } > EXPORT_SYMBOL_GPL(mcs_spin_lock); > @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock); > * Releases the lock. The caller should pass in the corresponding node that > * was used to acquire the lock. > */ > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > { > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > while (!(next = ACCESS_ONCE(node->next))) > arch_mutex_cpu_relax(); > } > - ACCESS_ONCE(next->locked) = 1; > - smp_wmb(); > + /* > + * Pass lock to next waiter. > + * smp_store_release() provides a memory barrier to ensure > + * all operations in the critical section has been completed > + * before unlocking. > + */ > + smp_store_release(&next->locked , 1); This smp_store_release() makes sense as well! Could you please get rid of the extraneous space before the comma? > } > EXPORT_SYMBOL_GPL(mcs_spin_unlock); > -- > 1.7.4.4 > > ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 19:46 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 19:46 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott On Tue, 2013-11-19 at 11:21 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote: > > From: Waiman Long <Waiman.Long@hp.com> > > > > This patch corrects the way memory barriers are used in the MCS lock > > with smp_load_acquire and smp_store_release fucnction. > > It removes ones that are not needed. > > > > It uses architecture specific load-acquire and store-release > > primitives for synchronization, if available. Generic implementations > > are provided in case they are not defined even though they may not > > be optimal. These generic implementation could be removed later on > > once changes are made in all the relevant header files. > > > > Suggested-by: Michel Lespinasse <walken@google.com> > > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > > Signed-off-by: Jason Low <jason.low2@hp.com> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > Please see comments below. > > Thanx, Paul > > > --- > > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > > 1 files changed, 41 insertions(+), 7 deletions(-) > > > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > > index b6f27f8..df5c167 100644 > > --- a/kernel/locking/mcs_spinlock.c > > +++ b/kernel/locking/mcs_spinlock.c > > @@ -23,6 +23,31 @@ > > #endif > > > > /* > > + * Fall back to use the regular atomic operations and memory barrier if > > + * the acquire/release versions are not defined. > > + */ > > +#ifndef xchg_acquire > > +# define xchg_acquire(p, v) xchg(p, v) > > +#endif > > + > > +#ifndef smp_load_acquire > > +# define smp_load_acquire(p) \ > > + ({ \ > > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > > + smp_mb(); \ > > + __v; \ > > + }) > > +#endif > > + > > +#ifndef smp_store_release > > +# define smp_store_release(p, v) \ > > + do { \ > > + smp_mb(); \ > > + ACCESS_ONCE(*(p)) = v; \ > > + } while (0) > > +#endif > > + > > +/* > > * In order to acquire the lock, the caller should declare a local node and > > * pass a reference of the node to this function in addition to the lock. > > * If the lock has already been acquired, then this will proceed to spin > > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > node->locked = 0; > > node->next = NULL; > > > > - prev = xchg(lock, node); > > + /* xchg() provides a memory barrier */ > > + prev = xchg_acquire(lock, node); > > But if this is xchg_acquire() with only acquire semantics, it need not > ensure that the initializations of node->locked and node->next above > will happen before the "ACCESS_ONCE(prev->next) = node" below. This > therefore needs to remain xchg(). Or you need an smp_store_release() > below instead of an ACCESS_ONCE() assignment. Good point. Will keep it as xchg. > > As currently written, the poor CPU doing the unlock can be fatally > disappointed by seeing pre-initialized values of ->locked and ->next. > This could, among other things, result in a hang where the handoff > happens before the initialization. > > > if (likely(prev == NULL)) { > > /* Lock acquired */ > > return; > > } > > ACCESS_ONCE(prev->next) = node; > > - smp_wmb(); > > - /* Wait until the lock holder passes the lock down */ > > - while (!ACCESS_ONCE(node->locked)) > > + /* > > + * Wait until the lock holder passes the lock down. > > + * Using smp_load_acquire() provides a memory barrier that > > + * ensures subsequent operations happen after the lock is acquired. > > + */ > > + while (!(smp_load_acquire(&node->locked))) > > arch_mutex_cpu_relax(); > > OK, this smp_load_acquire() makes sense! > > > } > > EXPORT_SYMBOL_GPL(mcs_spin_lock); > > @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock); > > * Releases the lock. The caller should pass in the corresponding node that > > * was used to acquire the lock. > > */ > > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > { > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > > > @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > while (!(next = ACCESS_ONCE(node->next))) > > arch_mutex_cpu_relax(); > > } > > - ACCESS_ONCE(next->locked) = 1; > > - smp_wmb(); > > + /* > > + * Pass lock to next waiter. > > + * smp_store_release() provides a memory barrier to ensure > > + * all operations in the critical section has been completed > > + * before unlocking. > > + */ > > + smp_store_release(&next->locked , 1); > > This smp_store_release() makes sense as well! > > Could you please get rid of the extraneous space before the comma? Will do. > > > } > > EXPORT_SYMBOL_GPL(mcs_spin_unlock); > > -- > > 1.7.4.4 > > > > > ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 19:46 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 19:46 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, 2013-11-19 at 11:21 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote: > > From: Waiman Long <Waiman.Long@hp.com> > > > > This patch corrects the way memory barriers are used in the MCS lock > > with smp_load_acquire and smp_store_release fucnction. > > It removes ones that are not needed. > > > > It uses architecture specific load-acquire and store-release > > primitives for synchronization, if available. Generic implementations > > are provided in case they are not defined even though they may not > > be optimal. These generic implementation could be removed later on > > once changes are made in all the relevant header files. > > > > Suggested-by: Michel Lespinasse <walken@google.com> > > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > > Signed-off-by: Jason Low <jason.low2@hp.com> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > Please see comments below. > > Thanx, Paul > > > --- > > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > > 1 files changed, 41 insertions(+), 7 deletions(-) > > > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > > index b6f27f8..df5c167 100644 > > --- a/kernel/locking/mcs_spinlock.c > > +++ b/kernel/locking/mcs_spinlock.c > > @@ -23,6 +23,31 @@ > > #endif > > > > /* > > + * Fall back to use the regular atomic operations and memory barrier if > > + * the acquire/release versions are not defined. > > + */ > > +#ifndef xchg_acquire > > +# define xchg_acquire(p, v) xchg(p, v) > > +#endif > > + > > +#ifndef smp_load_acquire > > +# define smp_load_acquire(p) \ > > + ({ \ > > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > > + smp_mb(); \ > > + __v; \ > > + }) > > +#endif > > + > > +#ifndef smp_store_release > > +# define smp_store_release(p, v) \ > > + do { \ > > + smp_mb(); \ > > + ACCESS_ONCE(*(p)) = v; \ > > + } while (0) > > +#endif > > + > > +/* > > * In order to acquire the lock, the caller should declare a local node and > > * pass a reference of the node to this function in addition to the lock. > > * If the lock has already been acquired, then this will proceed to spin > > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > node->locked = 0; > > node->next = NULL; > > > > - prev = xchg(lock, node); > > + /* xchg() provides a memory barrier */ > > + prev = xchg_acquire(lock, node); > > But if this is xchg_acquire() with only acquire semantics, it need not > ensure that the initializations of node->locked and node->next above > will happen before the "ACCESS_ONCE(prev->next) = node" below. This > therefore needs to remain xchg(). Or you need an smp_store_release() > below instead of an ACCESS_ONCE() assignment. Good point. Will keep it as xchg. > > As currently written, the poor CPU doing the unlock can be fatally > disappointed by seeing pre-initialized values of ->locked and ->next. > This could, among other things, result in a hang where the handoff > happens before the initialization. > > > if (likely(prev == NULL)) { > > /* Lock acquired */ > > return; > > } > > ACCESS_ONCE(prev->next) = node; > > - smp_wmb(); > > - /* Wait until the lock holder passes the lock down */ > > - while (!ACCESS_ONCE(node->locked)) > > + /* > > + * Wait until the lock holder passes the lock down. > > + * Using smp_load_acquire() provides a memory barrier that > > + * ensures subsequent operations happen after the lock is acquired. > > + */ > > + while (!(smp_load_acquire(&node->locked))) > > arch_mutex_cpu_relax(); > > OK, this smp_load_acquire() makes sense! > > > } > > EXPORT_SYMBOL_GPL(mcs_spin_lock); > > @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock); > > * Releases the lock. The caller should pass in the corresponding node that > > * was used to acquire the lock. > > */ > > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > { > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > > > @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > while (!(next = ACCESS_ONCE(node->next))) > > arch_mutex_cpu_relax(); > > } > > - ACCESS_ONCE(next->locked) = 1; > > - smp_wmb(); > > + /* > > + * Pass lock to next waiter. > > + * smp_store_release() provides a memory barrier to ensure > > + * all operations in the critical section has been completed > > + * before unlocking. > > + */ > > + smp_store_release(&next->locked , 1); > > This smp_store_release() makes sense as well! > > Could you please get rid of the extraneous space before the comma? Will do. > > > } > > EXPORT_SYMBOL_GPL(mcs_spin_unlock); > > -- > > 1.7.4.4 > > > > > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 70+ messages in thread
* Re: [PATCH v5 4/4] MCS Lock: Barrier corrections @ 2013-11-19 19:46 ` Tim Chen 0 siblings, 0 replies; 70+ messages in thread From: Tim Chen @ 2013-11-19 19:46 UTC (permalink / raw) To: paulmck Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, linux-kernel, linux-mm, linux-arch, Linus Torvalds, Waiman Long, Andrea Arcangeli, Alex Shi, Andi Kleen, Michel Lespinasse, Davidlohr Bueso, Matthew R Wilcox, Dave Hansen, Peter Zijlstra, Rik van Riel, Peter Hurley, Raghavendra K T, George Spelvin, H. Peter Anvin, Arnd Bergmann, Aswin Chandramouleeswaran, Scott J Norton, Will Deacon, Figo.zhang On Tue, 2013-11-19 at 11:21 -0800, Paul E. McKenney wrote: > On Fri, Nov 08, 2013 at 11:52:38AM -0800, Tim Chen wrote: > > From: Waiman Long <Waiman.Long@hp.com> > > > > This patch corrects the way memory barriers are used in the MCS lock > > with smp_load_acquire and smp_store_release fucnction. > > It removes ones that are not needed. > > > > It uses architecture specific load-acquire and store-release > > primitives for synchronization, if available. Generic implementations > > are provided in case they are not defined even though they may not > > be optimal. These generic implementation could be removed later on > > once changes are made in all the relevant header files. > > > > Suggested-by: Michel Lespinasse <walken@google.com> > > Signed-off-by: Waiman Long <Waiman.Long@hp.com> > > Signed-off-by: Jason Low <jason.low2@hp.com> > > Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> > > Please see comments below. > > Thanx, Paul > > > --- > > kernel/locking/mcs_spinlock.c | 48 +++++++++++++++++++++++++++++++++++------ > > 1 files changed, 41 insertions(+), 7 deletions(-) > > > > diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c > > index b6f27f8..df5c167 100644 > > --- a/kernel/locking/mcs_spinlock.c > > +++ b/kernel/locking/mcs_spinlock.c > > @@ -23,6 +23,31 @@ > > #endif > > > > /* > > + * Fall back to use the regular atomic operations and memory barrier if > > + * the acquire/release versions are not defined. > > + */ > > +#ifndef xchg_acquire > > +# define xchg_acquire(p, v) xchg(p, v) > > +#endif > > + > > +#ifndef smp_load_acquire > > +# define smp_load_acquire(p) \ > > + ({ \ > > + typeof(*p) __v = ACCESS_ONCE(*(p)); \ > > + smp_mb(); \ > > + __v; \ > > + }) > > +#endif > > + > > +#ifndef smp_store_release > > +# define smp_store_release(p, v) \ > > + do { \ > > + smp_mb(); \ > > + ACCESS_ONCE(*(p)) = v; \ > > + } while (0) > > +#endif > > + > > +/* > > * In order to acquire the lock, the caller should declare a local node and > > * pass a reference of the node to this function in addition to the lock. > > * If the lock has already been acquired, then this will proceed to spin > > @@ -37,15 +62,19 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > node->locked = 0; > > node->next = NULL; > > > > - prev = xchg(lock, node); > > + /* xchg() provides a memory barrier */ > > + prev = xchg_acquire(lock, node); > > But if this is xchg_acquire() with only acquire semantics, it need not > ensure that the initializations of node->locked and node->next above > will happen before the "ACCESS_ONCE(prev->next) = node" below. This > therefore needs to remain xchg(). Or you need an smp_store_release() > below instead of an ACCESS_ONCE() assignment. Good point. Will keep it as xchg. > > As currently written, the poor CPU doing the unlock can be fatally > disappointed by seeing pre-initialized values of ->locked and ->next. > This could, among other things, result in a hang where the handoff > happens before the initialization. > > > if (likely(prev == NULL)) { > > /* Lock acquired */ > > return; > > } > > ACCESS_ONCE(prev->next) = node; > > - smp_wmb(); > > - /* Wait until the lock holder passes the lock down */ > > - while (!ACCESS_ONCE(node->locked)) > > + /* > > + * Wait until the lock holder passes the lock down. > > + * Using smp_load_acquire() provides a memory barrier that > > + * ensures subsequent operations happen after the lock is acquired. > > + */ > > + while (!(smp_load_acquire(&node->locked))) > > arch_mutex_cpu_relax(); > > OK, this smp_load_acquire() makes sense! > > > } > > EXPORT_SYMBOL_GPL(mcs_spin_lock); > > @@ -54,7 +83,7 @@ EXPORT_SYMBOL_GPL(mcs_spin_lock); > > * Releases the lock. The caller should pass in the corresponding node that > > * was used to acquire the lock. > > */ > > -static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) > > { > > struct mcs_spinlock *next = ACCESS_ONCE(node->next); > > > > @@ -68,7 +97,12 @@ static void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *nod > > while (!(next = ACCESS_ONCE(node->next))) > > arch_mutex_cpu_relax(); > > } > > - ACCESS_ONCE(next->locked) = 1; > > - smp_wmb(); > > + /* > > + * Pass lock to next waiter. > > + * smp_store_release() provides a memory barrier to ensure > > + * all operations in the critical section has been completed > > + * before unlocking. > > + */ > > + smp_store_release(&next->locked , 1); > > This smp_store_release() makes sense as well! > > Could you please get rid of the extraneous space before the comma? Will do. > > > } > > EXPORT_SYMBOL_GPL(mcs_spin_unlock); > > -- > > 1.7.4.4 > > > > > ^ permalink raw reply [flat|nested] 70+ messages in thread
end of thread, other threads:[~2013-11-19 23:30 UTC | newest]
Thread overview: 70+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <cover.1383935697.git.tim.c.chen@linux.intel.com>
2013-11-08 19:51 ` [PATCH v5 1/4] MCS Lock: Restructure the MCS lock defines and locking code into its own file Tim Chen
2013-11-08 19:51 ` Tim Chen
2013-11-08 19:51 ` Tim Chen
2013-11-19 19:10 ` Paul E. McKenney
2013-11-19 19:10 ` Paul E. McKenney
2013-11-19 19:10 ` Paul E. McKenney
2013-11-19 19:42 ` Tim Chen
2013-11-19 19:42 ` Tim Chen
2013-11-19 19:42 ` Tim Chen
2013-11-19 19:54 ` Paul E. McKenney
2013-11-19 19:54 ` Paul E. McKenney
2013-11-19 19:54 ` Paul E. McKenney
2013-11-08 19:52 ` [PATCH v5 2/4] MCS Lock: optimizations and extra comments Tim Chen
2013-11-08 19:52 ` Tim Chen
2013-11-08 19:52 ` Tim Chen
2013-11-19 19:13 ` Paul E. McKenney
2013-11-19 19:13 ` Paul E. McKenney
2013-11-19 19:13 ` Paul E. McKenney
2013-11-19 19:42 ` Tim Chen
2013-11-19 19:42 ` Tim Chen
2013-11-19 19:42 ` Tim Chen
2013-11-19 22:57 ` Tim Chen
2013-11-19 22:57 ` Tim Chen
2013-11-19 22:57 ` Tim Chen
2013-11-19 23:05 ` Paul E. McKenney
2013-11-19 23:05 ` Paul E. McKenney
2013-11-19 23:05 ` Paul E. McKenney
2013-11-08 19:52 ` [PATCH v5 3/4] MCS Lock: Move mcs_lock/unlock function into its own file Tim Chen
2013-11-08 19:52 ` Tim Chen
2013-11-08 19:52 ` Tim Chen
2013-11-19 19:15 ` Paul E. McKenney
2013-11-19 19:15 ` Paul E. McKenney
2013-11-19 19:15 ` Paul E. McKenney
2013-11-08 19:52 ` [PATCH v5 4/4] MCS Lock: Barrier corrections Tim Chen
2013-11-08 19:52 ` Tim Chen
2013-11-08 19:52 ` Tim Chen
2013-11-11 18:10 ` Will Deacon
2013-11-11 18:10 ` Will Deacon
2013-11-11 18:20 ` Peter Zijlstra
2013-11-11 18:20 ` Peter Zijlstra
2013-11-19 19:23 ` Paul E. McKenney
2013-11-19 19:23 ` Paul E. McKenney
2013-11-11 21:17 ` Tim Chen
2013-11-11 21:17 ` Tim Chen
2013-11-12 1:57 ` Waiman Long
2013-11-12 1:57 ` Waiman Long
2013-11-19 19:32 ` Paul E. McKenney
2013-11-19 19:32 ` Paul E. McKenney
2013-11-19 21:45 ` Tim Chen
2013-11-19 21:45 ` Tim Chen
2013-11-19 23:30 ` Paul E. McKenney
2013-11-19 23:30 ` Paul E. McKenney
2013-11-12 2:09 ` Waiman Long
2013-11-12 2:09 ` Waiman Long
2013-11-12 14:54 ` Waiman Long
2013-11-12 14:54 ` Waiman Long
2013-11-12 16:08 ` Will Deacon
2013-11-12 16:08 ` Will Deacon
2013-11-12 17:16 ` George Spelvin
2013-11-12 17:16 ` George Spelvin
2013-11-13 17:37 ` Will Deacon
2013-11-13 17:37 ` Will Deacon
2013-11-19 19:26 ` Paul E. McKenney
2013-11-19 19:26 ` Paul E. McKenney
2013-11-19 19:21 ` Paul E. McKenney
2013-11-19 19:21 ` Paul E. McKenney
2013-11-19 19:21 ` Paul E. McKenney
2013-11-19 19:46 ` Tim Chen
2013-11-19 19:46 ` Tim Chen
2013-11-19 19:46 ` Tim Chen
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.