From: Waiman Long <waiman.long@hpe.com>
To: Jason Low <jason.low2@hpe.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
Ding Tianhong <dingtianhong@huawei.com>,
Thomas Gleixner <tglx@linutronix.de>,
Will Deacon <Will.Deacon@arm.com>, Ingo Molnar <mingo@redhat.com>,
<imre.deak@intel.com>, <linux-kernel@vger.kernel.org>,
Davidlohr Bueso <dave@stgolabs.net>,
Tim Chen <tim.c.chen@linux.intel.com>, <terry.rudd@hpe.com>,
"Paul E. McKenney" <paulmck@us.ibm.com>, <jason.low2@hp.com>
Subject: Re: [PATCH v4] locking/mutex: Prevent lock starvation when spinning is disabled
Date: Fri, 19 Aug 2016 12:57:06 -0400 [thread overview]
Message-ID: <57B73A62.9020901@hpe.com> (raw)
In-Reply-To: <1471567197.4991.41.camel@j-VirtualBox>
On 08/18/2016 08:39 PM, Jason Low wrote:
> Imre reported an issue where threads are getting starved when trying
> to acquire a mutex. Threads acquiring a mutex can get arbitrarily delayed
> sleeping on a mutex because other threads can continually steal the lock
> in the fastpath and/or through optimistic spinning.
>
> Waiman has developed patches that allow waiters to return to optimistic
> spinning, thus reducing the probability that starvation occurs. However,
> Imre still sees this starvation problem in the workloads when optimistic
> spinning is disabled.
>
> This patch adds an additional boolean to the mutex that gets used in
> the CONFIG_SMP&& !CONFIG_MUTEX_SPIN_ON_OWNER cases. The flag signifies
> whether or not other threads need to yield to a waiter and gets set
> when a waiter spends too much time waiting for the mutex. The threshold
> is currently set to 16 wakeups, and once the wakeup threshold is exceeded,
> other threads must yield to the top waiter. The flag gets cleared
> immediately after the top waiter acquires the mutex.
>
> This prevents waiters from getting starved without sacrificing much
> much performance, as lock stealing is still allowed and only
> temporarily disabled when it is detected that a waiter has been waiting
> for too long.
>
> Reported-by: Imre Deak<imre.deak@intel.com>
> Signed-off-by: Jason Low<jason.low2@hpe.com>
> ---
> include/linux/mutex.h | 2 +
> kernel/locking/mutex.c | 122 +++++++++++++++++++++++++++++++++++++++----------
> 2 files changed, 99 insertions(+), 25 deletions(-)
>
> diff --git a/include/linux/mutex.h b/include/linux/mutex.h
> index f8e91ad..988c020 100644
> --- a/include/linux/mutex.h
> +++ b/include/linux/mutex.h
> @@ -58,6 +58,8 @@ struct mutex {
> #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> struct optimistic_spin_queue osq; /* Spinner MCS lock */
> int waiter_spinning;
> +#elif defined(CONFIG_SMP)
> + int yield_to_waiter;
> #endif
> #ifdef CONFIG_DEBUG_MUTEXES
> void *magic;
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index 64a0bfa..e078c49 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -56,6 +56,8 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
> #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
> osq_lock_init(&lock->osq);
> lock->waiter_spinning = false;
> +#elif defined(CONFIG_SMP)
> + lock->yield_to_waiter = false;
> #endif
>
> debug_mutex_init(lock, name, key);
> @@ -72,6 +74,9 @@ EXPORT_SYMBOL(__mutex_init);
> */
> __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
>
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock);
> +
> /**
> * mutex_lock - acquire the mutex
> * @lock: the mutex to be acquired
> @@ -100,7 +105,10 @@ void __sched mutex_lock(struct mutex *lock)
> * The locking fastpath is the 1->0 transition from
> * 'unlocked' into 'locked' state.
> */
> - __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
> + if (!need_yield_to_waiter(lock))
> + __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
> + else
> + __mutex_lock_slowpath(&lock->count);
> mutex_set_owner(lock);
> }
>
> @@ -449,6 +457,49 @@ static bool mutex_optimistic_spin(struct mutex *lock,
> }
> #endif
>
> +#if !defined(CONFIG_MUTEX_SPIN_ON_OWNER)&& defined(CONFIG_SMP)
> +
> +#define MUTEX_WAKEUP_THRESHOLD 16
> +
> +static inline void update_yield_to_waiter(struct mutex *lock, int *wakeups)
> +{
> + if (++(*wakeups)> MUTEX_WAKEUP_THRESHOLD&& !lock->yield_to_waiter)
> + lock->yield_to_waiter = true;
> +}
> +
> +static inline void clear_yield_to_waiter(struct mutex *lock,
> + struct mutex_waiter *waiter)
> +{
> + /* Only clear yield_to_waiter if we are the top waiter. */
> + if (lock->wait_list.next ==&waiter->list&& lock->yield_to_waiter)
> + lock->yield_to_waiter = false;
> +}
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock)
> +{
> + return unlikely(lock->yield_to_waiter);
> +}
> +
> +#else /* !yield_to_waiter */
> +
> +static inline void update_yield_to_waiter(struct mutex *lock, int *wakeups)
> +{
> + return;
> +}
> +
> +static inline void clear_yield_to_waiter(struct mutex *lock,
> + struct mutex_waiter *waiter)
> +{
> + return;
> +}
> +
> +static inline bool need_yield_to_waiter(struct mutex *lock)
> +{
> + return false;
> +}
> +
> +#endif /* yield_to_waiter */
> +
> __visible __used noinline
> void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
>
> @@ -541,6 +592,12 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
> return 0;
> }
>
> +static inline bool __mutex_trylock_pending(struct mutex *lock)
> +{
> + return atomic_read(&lock->count)>= 0&&
> + atomic_xchg_acquire(&lock->count, -1) == 1;
> +}
> +
Maybe you can make a more general __mutex_trylock function that is used
in all three trylock attempts in the slowpath. For example,
static inline bool __mutex_trylock(struct mutex *lock, bool waiter)
{
if (waiter) {
return atomic_read(&lock->count) >= 0 &&
atomic_xchg_acquire(&lock->count, -1) == 1;
} else {
return !need_yield_to_waiter(lock) &&
!mutex_is_locked(lock) &&
((atomic_xchg_acquire(&lock->count, 0) == 1);
}
}
> /*
> * Lock a mutex (possibly interruptible), slowpath:
> */
> @@ -553,7 +610,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> struct mutex_waiter waiter;
> unsigned long flags;
> bool acquired = false; /* True if the lock is acquired */
> - int ret;
> + int ret, wakeups = 0;
>
> if (use_ww_ctx) {
> struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
> @@ -576,7 +633,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> * Once more, try to acquire the lock. Only try-lock the mutex if
> * it is unlocked to reduce unnecessary xchg() operations.
> */
> - if (!mutex_is_locked(lock)&&
> + if (!need_yield_to_waiter(lock)&& !mutex_is_locked(lock)&&
> (atomic_xchg_acquire(&lock->count, 0) == 1))
> goto skip_wait;
>
> @@ -587,24 +644,18 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> list_add_tail(&waiter.list,&lock->wait_list);
> waiter.task = task;
>
> + /*
> + * If this is the first waiter, mark the lock as having pending
> + * waiters, if we happen to acquire it while doing so, yay!
> + */
> + if (list_is_singular(&lock->wait_list)&&
> + __mutex_trylock_pending(lock))
> + goto remove_waiter;
> +
> lock_contended(&lock->dep_map, ip);
>
> while (!acquired) {
> /*
> - * Lets try to take the lock again - this is needed even if
> - * we get here for the first time (shortly after failing to
> - * acquire the lock), to make sure that we get a wakeup once
> - * it's unlocked. Later on, if we sleep, this is the
> - * operation that gives us the lock. We xchg it to -1, so
> - * that when we release the lock, we properly wake up the
> - * other waiters. We only attempt the xchg if the count is
> - * non-negative in order to avoid unnecessary xchg operations:
> - */
> - if (atomic_read(&lock->count)>= 0&&
> - (atomic_xchg_acquire(&lock->count, -1) == 1))
> - break;
> -
> - /*
> * got a signal? (This code gets eliminated in the
> * TASK_UNINTERRUPTIBLE case.)
> */
> @@ -631,9 +682,21 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> acquired = mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx,
> true);
> spin_lock_mutex(&lock->wait_lock, flags);
> +
> + update_yield_to_waiter(lock,&wakeups);
> +
> + /*
> + * Try-acquire now that we got woken at the head of the queue
> + * or we received a signal.
> + */
> + if (__mutex_trylock_pending(lock))
> + break;
That is not quite right. The lock may have been acquired in the
optimistic spinning loop. You either have to move it back to the top or
add a "!acquired" check before the trylock.
Cheers,
Longman
next prev parent reply other threads:[~2016-08-19 17:12 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-08-19 0:39 [PATCH v4] locking/mutex: Prevent lock starvation when spinning is disabled Jason Low
2016-08-19 4:11 ` Jason Low
2016-08-19 12:13 ` Peter Zijlstra
2016-08-19 16:57 ` Waiman Long [this message]
2016-08-19 19:33 ` Peter Zijlstra
2016-08-19 19:45 ` Linus Torvalds
2016-08-23 12:45 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=57B73A62.9020901@hpe.com \
--to=waiman.long@hpe.com \
--cc=Will.Deacon@arm.com \
--cc=dave@stgolabs.net \
--cc=dingtianhong@huawei.com \
--cc=imre.deak@intel.com \
--cc=jason.low2@hp.com \
--cc=jason.low2@hpe.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=paulmck@us.ibm.com \
--cc=peterz@infradead.org \
--cc=terry.rudd@hpe.com \
--cc=tglx@linutronix.de \
--cc=tim.c.chen@linux.intel.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox