From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org,
linux-rt-users <linux-rt-users@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>,
Carsten Emde <C.Emde@osadl.org>,
Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
John Kacur <jkacur@redhat.com>,
Paul Gortmaker <paul.gortmaker@windriver.com>,
stable-rt@vger.kernel.org
Subject: [PATCH RT 1/6] kernel/futex: dont deboost too early
Date: Sun, 30 Oct 2016 12:45:08 -0400 [thread overview]
Message-ID: <20161030164519.206464333@goodmis.org> (raw)
In-Reply-To: 20161030164507.542995196@goodmis.org
[-- Attachment #1: 0001-kernel-futex-don-t-deboost-too-early.patch --]
[-- Type: text/plain, Size: 5935 bytes --]
4.4.27-rt38-rc1 stable review patch.
If anyone has any objections, please let me know.
------------------
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
The sequence:
T1 holds futex
T2 blocks on futex and boosts T1
T1 unlocks futex and holds hb->lock
T1 unlocks rt mutex, so T1 has no more pi waiters
T3 blocks on hb->lock and adds itself to the pi waiters list of T1
T1 unlocks hb->lock and deboosts itself
T4 preempts T1 so the wakeup of T2 gets delayed
As a workaround I attempt here do unlock the hb->lock without a deboost
and perform the deboost after the wake up of the waiter.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
include/linux/spinlock.h | 6 +++++
include/linux/spinlock_rt.h | 2 ++
kernel/futex.c | 2 +-
kernel/locking/rtmutex.c | 53 +++++++++++++++++++++++++++++++++++++++------
4 files changed, 55 insertions(+), 8 deletions(-)
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index b241cc044bd3..02928fa5499d 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -355,6 +355,12 @@ static __always_inline void spin_unlock(spinlock_t *lock)
raw_spin_unlock(&lock->rlock);
}
+static __always_inline int spin_unlock_no_deboost(spinlock_t *lock)
+{
+ raw_spin_unlock(&lock->rlock);
+ return 0;
+}
+
static __always_inline void spin_unlock_bh(spinlock_t *lock)
{
raw_spin_unlock_bh(&lock->rlock);
diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
index 3b2825537531..7eb87584e843 100644
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -26,6 +26,7 @@ extern void __lockfunc rt_spin_lock(spinlock_t *lock);
extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
+extern int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock);
extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
@@ -112,6 +113,7 @@ static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
#define spin_unlock(lock) rt_spin_unlock(lock)
+#define spin_unlock_no_deboost(lock) rt_spin_unlock_no_deboost(lock)
#define spin_unlock_bh(lock) \
do { \
diff --git a/kernel/futex.c b/kernel/futex.c
index ad38af0bcff3..059623427b99 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1288,7 +1288,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
* deboost first (and lose our higher priority), then the task might get
* scheduled away before the wake up can take place.
*/
- spin_unlock(&hb->lock);
+ deboost |= spin_unlock_no_deboost(&hb->lock);
wake_up_q(&wake_q);
wake_up_q_sleeper(&wake_sleeper_q);
if (deboost)
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index fde5e54f1096..6759a798c927 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -939,13 +939,14 @@ static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
slowfn(lock, do_mig_dis);
}
-static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
- void (*slowfn)(struct rt_mutex *lock))
+static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock,
+ int (*slowfn)(struct rt_mutex *lock))
{
- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
rt_mutex_deadlock_account_unlock(current);
- else
- slowfn(lock);
+ return 0;
+ }
+ return slowfn(lock);
}
#ifdef CONFIG_SMP
/*
@@ -1086,7 +1087,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
/*
* Slow path to release a rt_mutex spin_lock style
*/
-static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
+static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
{
unsigned long flags;
WAKE_Q(wake_q);
@@ -1101,7 +1102,7 @@ static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
if (!rt_mutex_has_waiters(lock)) {
lock->owner = NULL;
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
- return;
+ return 0;
}
mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
@@ -1112,6 +1113,33 @@ static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
/* Undo pi boosting.when necessary */
rt_mutex_adjust_prio(current);
+ return 0;
+}
+
+static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock)
+{
+ unsigned long flags;
+ WAKE_Q(wake_q);
+ WAKE_Q(wake_sleeper_q);
+
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+
+ debug_rt_mutex_unlock(lock);
+
+ rt_mutex_deadlock_account_unlock(current);
+
+ if (!rt_mutex_has_waiters(lock)) {
+ lock->owner = NULL;
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ return 0;
+ }
+
+ mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
+
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ wake_up_q(&wake_q);
+ wake_up_q_sleeper(&wake_sleeper_q);
+ return 1;
}
void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
@@ -1166,6 +1194,17 @@ void __lockfunc rt_spin_unlock(spinlock_t *lock)
}
EXPORT_SYMBOL(rt_spin_unlock);
+int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock)
+{
+ int ret;
+
+ /* NOTE: we always pass in '1' for nested, for simplicity */
+ spin_release(&lock->dep_map, 1, _RET_IP_);
+ ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost);
+ migrate_enable();
+ return ret;
+}
+
void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
{
rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
--
2.9.3
next prev parent reply other threads:[~2016-10-30 16:45 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-30 16:45 [PATCH RT 0/6] Linux 4.4.27-rt38-rc1 Steven Rostedt
2016-10-30 16:45 ` Steven Rostedt [this message]
2016-10-30 16:45 ` [PATCH RT 2/6] ftrace: Fix trace header alignment Steven Rostedt
2016-10-30 16:45 ` [PATCH RT 3/6] zsmalloc: turn that get_cpu_light() into a local_lock() Steven Rostedt
2016-10-30 16:45 ` [PATCH RT 4/6] x86/apic: get rid of "warning: acpi_ioapic_lock defined but not used" Steven Rostedt
2016-10-30 16:45 ` [PATCH RT 5/6] kbuild: add -fno-PIE Steven Rostedt
2016-10-30 16:45 ` [PATCH RT 6/6] Linux 4.4.27-rt38-rc1 Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20161030164519.206464333@goodmis.org \
--to=rostedt@goodmis.org \
--cc=C.Emde@osadl.org \
--cc=bigeasy@linutronix.de \
--cc=jkacur@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rt-users@vger.kernel.org \
--cc=paul.gortmaker@windriver.com \
--cc=stable-rt@vger.kernel.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox