public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: xlpang@redhat.com
Cc: linux-kernel@vger.kernel.org, Juri Lelli <juri.lelli@arm.com>,
	Ingo Molnar <mingo@redhat.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: Re: [PATCH] sched/deadline/rtmutex: Fix a PI crash for deadline tasks
Date: Fri, 1 Apr 2016 23:51:43 +0200	[thread overview]
Message-ID: <20160401215143.GB2906@worktop> (raw)
In-Reply-To: <56FE78E0.5060504@redhat.com>

On Fri, Apr 01, 2016 at 09:34:24PM +0800, Xunlei Pang wrote:

> >> I checked the code, currently only deadline accesses the
> >> pi_waiters/pi_waiters_leftmost
> >> without pi_lock held via rt_mutex_get_top_task(), other cases all have
> >> pi_lock held.

> Any better ideas is welcome.

Something like the below _might_ work; but its late and I haven't looked
at the PI code in a while. This basically caches a pointer to the top
waiter task in the running task_struct, under pi_lock and rq->lock, and
therefore we can use it with only rq->lock held.

Since the task is blocked, and cannot unblock without taking itself from
the block chain -- which would cause rt_mutex_setprio() to set another
top waiter task, the lifetime rules should be good.

Having this top waiter pointer around might also be useful to further
implement bandwidth inheritance or such, but I've not thought about that
too much.

Lots of code deleted because we move the entire task->prio mapping muck
into the scheduler, because we now pass a pi_task, not a prio.

---
 include/linux/sched.h    |  1 +
 include/linux/sched/rt.h | 20 +-------------------
 kernel/locking/rtmutex.c | 45 +++++----------------------------------------
 kernel/sched/core.c      | 34 +++++++++++++++++++++++-----------
 kernel/sched/deadline.c  |  2 +-
 5 files changed, 31 insertions(+), 71 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 52c4847b05e2..30169f38cb24 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1615,6 +1615,7 @@ struct task_struct {
 
 	/* Protection of the PI data structures: */
 	raw_spinlock_t pi_lock;
+	struct task_struct *pi_task;
 
 	struct wake_q_node wake_q;
 
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index a30b172df6e1..0a8f071b16e3 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -16,31 +16,13 @@ static inline int rt_task(struct task_struct *p)
 }
 
 #ifdef CONFIG_RT_MUTEXES
-extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
-extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio);
-extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
+extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
 extern void rt_mutex_adjust_pi(struct task_struct *p);
 static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
 {
 	return tsk->pi_blocked_on != NULL;
 }
 #else
-static inline int rt_mutex_getprio(struct task_struct *p)
-{
-	return p->normal_prio;
-}
-
-static inline int rt_mutex_get_effective_prio(struct task_struct *task,
-					      int newprio)
-{
-	return newprio;
-}
-
-static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
-{
-	return NULL;
-}
 # define rt_mutex_adjust_pi(p)		do { } while (0)
 static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
 {
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3e746607abe5..13b6b5922d3c 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -257,53 +257,18 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 }
 
 /*
- * Calculate task priority from the waiter tree priority
- *
- * Return task->normal_prio when the waiter tree is empty or when
- * the waiter is not allowed to do priority boosting
- */
-int rt_mutex_getprio(struct task_struct *task)
-{
-	if (likely(!task_has_pi_waiters(task)))
-		return task->normal_prio;
-
-	return min(task_top_pi_waiter(task)->prio,
-		   task->normal_prio);
-}
-
-struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
-{
-	if (likely(!task_has_pi_waiters(task)))
-		return NULL;
-
-	return task_top_pi_waiter(task)->task;
-}
-
-/*
- * Called by sched_setscheduler() to get the priority which will be
- * effective after the change.
- */
-int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
-{
-	if (!task_has_pi_waiters(task))
-		return newprio;
-
-	if (task_top_pi_waiter(task)->task->prio <= newprio)
-		return task_top_pi_waiter(task)->task->prio;
-	return newprio;
-}
-
-/*
  * Adjust the priority of a task, after its pi_waiters got modified.
  *
  * This can be both boosting and unboosting. task->pi_lock must be held.
  */
 static void __rt_mutex_adjust_prio(struct task_struct *task)
 {
-	int prio = rt_mutex_getprio(task);
+	struct task_struct *pi_task = task;
+
+	if (unlikely(task_has_pi_waiters(task)))
+		pi_task = task_top_pi_waiter(task)->task;
 
-	if (task->prio != prio || dl_prio(prio))
-		rt_mutex_setprio(task, prio);
+	rt_mutex_setprio(task, pi_task);
 }
 
 /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8b489fcac37b..7d7e3a0eaeb0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3392,7 +3392,7 @@ EXPORT_SYMBOL(default_wake_function);
 /*
  * rt_mutex_setprio - set the current priority of a task
  * @p: task
- * @prio: prio value (kernel-internal form)
+ * @pi_task: top waiter, donating state
  *
  * This function changes the 'effective' priority of a task. It does
  * not touch ->normal_prio like __setscheduler().
@@ -3400,13 +3400,20 @@ EXPORT_SYMBOL(default_wake_function);
  * Used by the rt_mutex code to implement priority inheritance
  * logic. Call site only calls if the priority of the task changed.
  */
-void rt_mutex_setprio(struct task_struct *p, int prio)
+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
 {
-	int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
-	struct rq *rq;
+	int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
 	const struct sched_class *prev_class;
+	struct rq *rq;
 
-	BUG_ON(prio > MAX_PRIO);
+	/*
+	 * For FIFO/RR we simply donate prio; for DL things are
+	 * more interesting.
+	 */
+	/* XXX used to be waiter->prio, not waiter->task->prio */
+	prio = min(pi_task->prio, p->normal_prio);
+	if (p->prio == prio && !dl_prio(prio))
+		return;
 
 	rq = __task_rq_lock(p);
 
@@ -3442,6 +3449,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	if (running)
 		put_prev_task(rq, p);
 
+	if (pi_task == p)
+		pi_task = NULL;
+	p->pi_task = pi_task;
+
 	/*
 	 * Boosting condition are:
 	 * 1. -rt task is running and holds mutex A
@@ -3452,7 +3463,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	 *          running task
 	 */
 	if (dl_prio(prio)) {
-		struct task_struct *pi_task = rt_mutex_get_top_task(p);
 		if (!dl_prio(p->normal_prio) ||
 		    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
 			p->dl.dl_boosted = 1;
@@ -3727,10 +3737,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
 	 * Keep a potential priority boosting if called from
 	 * sched_setscheduler().
 	 */
-	if (keep_boost)
-		p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
-	else
-		p->prio = normal_prio(p);
+	p->prio = normal_prio(p);
+	if (keep_boost && p->pi_task)
+		p->prio = min(p->prio, p->pi_task->prio);
 
 	if (dl_prio(p->prio))
 		p->sched_class = &dl_sched_class;
@@ -4017,7 +4026,10 @@ static int __sched_setscheduler(struct task_struct *p,
 		 * the runqueue. This will be done when the task deboost
 		 * itself.
 		 */
-		new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+		new_effective_prio = newprio;
+		if (p->pi_task)
+			new_effective_prio = min(new_effective_prio, p->pi_task->prio);
+
 		if (new_effective_prio == oldprio)
 			queue_flags &= ~DEQUEUE_MOVE;
 	}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index affd97ec9f65..6c5aa4612eb6 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -932,7 +932,7 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
 
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
-	struct task_struct *pi_task = rt_mutex_get_top_task(p);
+	struct task_struct *pi_task = p->pi_task;
 	struct sched_dl_entity *pi_se = &p->dl;
 
 	/*

  reply	other threads:[~2016-04-01 21:51 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-01 11:00 [PATCH] sched/deadline/rtmutex: Fix a PI crash for deadline tasks Xunlei Pang
2016-04-01 11:38 ` Peter Zijlstra
2016-04-01 12:23   ` Xunlei Pang
2016-04-01 13:12     ` Peter Zijlstra
2016-04-01 13:34       ` Xunlei Pang
2016-04-01 21:51         ` Peter Zijlstra [this message]
2016-04-02 10:19           ` Xunlei Pang
2016-04-05  8:38           ` Xunlei Pang
2016-04-05  9:19             ` Peter Zijlstra
2016-04-05  9:29               ` Peter Zijlstra
2016-04-05 10:48                 ` Xunlei Pang
2016-04-05 11:32                   ` Peter Zijlstra
2016-04-08 16:25                 ` Steven Rostedt
2016-04-08 17:38                   ` Peter Zijlstra
2016-04-08 18:50                     ` Steven Rostedt
2016-04-08 18:59                       ` Peter Zijlstra
2016-04-08 19:15                         ` Steven Rostedt
2016-04-08 19:28                           ` Steven Rostedt
2016-04-09  3:27                             ` Xunlei Pang
2016-04-09  3:25                         ` Xunlei Pang
2016-04-09 13:29                           ` Peter Zijlstra
2016-04-10  8:22                             ` Xunlei Pang
2016-04-12  3:08                               ` Xunlei Pang
2016-04-12 15:51                                 ` Peter Zijlstra
2016-04-13  2:13                                   ` Xunlei Pang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160401215143.GB2906@worktop \
    --to=peterz@infradead.org \
    --cc=juri.lelli@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=xlpang@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox