All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: William Weston <weston@sysex.net>, linux-kernel@vger.kernel.org
Subject: Re: Real-Time Preemption, -RT-2.6.12-final-V0.7.51-12
Date: Sun, 10 Jul 2005 17:10:08 +0200	[thread overview]
Message-ID: <20050710151008.GA28194@elte.hu> (raw)
In-Reply-To: <1120994288.14680.0.camel@twins>


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> > I can reproduce priority leakage on my SMP system; any userspace process
> > chrt'ed up and a lot will follow. This makes the system very
> > unresponsive when doing a make -j5. Verified on 51-{6,18,23}.
> > 
> 
> The following patch seems to fix it for me, YMMV.
> 
> --- kernel/sched.c~     2005-07-08 10:27:59.000000000 +0200
> +++ kernel/sched.c      2005-07-10 13:00:42.000000000 +0200
> @@ -780,7 +780,8 @@ static void recalc_task_prio(task_t *p,
>                 }
>         }
> 
> -       p->prio = p->normal_prio = effective_prio(p);
> +       p->prio = effective_prio(p);
> +       p->normal_prio = unlikely(rt_prio(p->normal_prio)) ? p->prio : __effective_prio(p);
>  }

ahh, indeed, this code did not take boosting into account. Good catch!  
I'm wondering why this only showed up on SMP. I've fixed it a bit 
differently in my tree, by making the roles of the various priority 
fields and functions more obvious, see the delta patch below.  I've also 
released the -51-23 patch with these changes included. Does this fix 
priority leakage on your SMP system?

	Ingo

Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -1035,7 +1035,7 @@ extern int idle_cpu(int cpu);
 extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
 extern task_t *idle_task(int cpu);
 extern void mutex_setprio(task_t *p, int prio);
-extern int mutex_getprio(task_t *p);
+extern int normal_prio(task_t *p);
 
 void yield(void);
 void __yield(void);
Index: linux/kernel/rt.c
===================================================================
--- linux.orig/kernel/rt.c
+++ linux/kernel/rt.c
@@ -720,7 +720,7 @@ static void pi_setprio(struct rt_mutex *
 
 #ifdef CONFIG_RT_DEADLOCK_DETECT
 	pi_prio++;
-	if (p->policy != SCHED_NORMAL && prio > mutex_getprio(p)) {
+	if (p->policy != SCHED_NORMAL && prio > normal_prio(p)) {
 		TRACE_OFF();
 
 		printk("huh? (%d->%d??)\n", p->prio, prio);
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -645,7 +645,7 @@ static inline void enqueue_task_head(str
 }
 
 /*
- * effective_prio - return the priority that is based on the static
+ * __normal_prio - return the priority that is based on the static
  * priority but is modified by bonuses/penalties.
  *
  * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
@@ -659,7 +659,7 @@ static inline void enqueue_task_head(str
  * Both properties are important to certain workloads.
  */
 
-static inline int __effective_prio(task_t *p)
+static inline int __normal_prio(task_t *p)
 {
 	int bonus, prio;
 
@@ -670,57 +670,53 @@ static inline int __effective_prio(task_
 		prio = MAX_RT_PRIO;
 	if (prio > MAX_PRIO-1)
 		prio = MAX_PRIO-1;
-	return prio;
-}
 
-static int effective_prio(task_t *p)
-{
-	if (rt_task(p))
-		return p->prio;
-	return __effective_prio(p);
-}
-
-static inline void trace_start_sched_wakeup(task_t *p, runqueue_t *rq)
-{
-	if (TASK_PREEMPTS_CURR(p, rq) && (p != rq->curr))
-		__trace_start_sched_wakeup(p);
+	return prio;
 }
 
 /*
- * __activate_task - move a task to the runqueue.
+ * Calculate the expected normal priority: i.e. priority
+ * without taking RT-inheritance into account. Might be
+ * boosted by interactivity modifiers. Changes upon fork,
+ * setprio syscalls, and whenever the interactivity
+ * estimator recalculates.
  */
-static inline void __activate_task(task_t *p, runqueue_t *rq)
+inline int normal_prio(task_t *p)
 {
-	trace_special_pid(p->pid, p->prio, rq->nr_running);
-	enqueue_task(p, rq->active);
-	rq->nr_running++;
+	int prio;
+
+	if (p->policy != SCHED_NORMAL)
+		prio = MAX_RT_PRIO-1 - p->rt_priority;
+	else
+		prio = __normal_prio(p);
+
+	trace_special_pid(p->pid, p->prio, prio);
+	return prio;
 }
 
 /*
- * __activate_task_after - move a task to the runqueue,
- *                         to execute after a specific task.
+ * Calculate the current priority, i.e. the priority
+ * taken into account by the scheduler. This value might
+ * be boosted by RT tasks, or might be boosted by
+ * interactivity modifiers. Will be RT if the task got
+ * RT-boosted. If not then it returns p->normal_prio.
  */
-static inline
-void __activate_task_after(task_t *p, task_t *parent, runqueue_t *rq)
+static void __recalc_task_prio(task_t *p)
 {
-	// FIXME: to head rather?
-	list_add_tail(&p->run_list, &parent->run_list);
-	p->array = parent->array;
-	p->array->nr_active++;
-	rq->nr_running++;
-	inc_rt_tasks(p, rq);
+	p->normal_prio = normal_prio(p);
+	/*
+	 * If we are RT tasks or we were boosted to RT priority,
+	 * keep the priority unchanged. Otherwise, update priority
+	 * to the normal priority:
+	 */
+	if (!rt_prio(p->prio))
+		p->prio = p->normal_prio;
 }
 
 /*
- * __activate_idle_task - move idle task to the _front_ of runqueue.
+ * Recalculate p->normal_prio and p->prio after having slept,
+ * updating the sleep-average too:
  */
-static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
-{
-	enqueue_task_head(p, rq->active);
-	rq->nr_running++;
-	WARN_ON(rt_task(p));
-}
-
 static void recalc_task_prio(task_t *p, unsigned long long now)
 {
 	/* Caller must always ensure 'now >= p->timestamp' */
@@ -780,7 +776,48 @@ static void recalc_task_prio(task_t *p, 
 		}
 	}
 
-	p->prio = p->normal_prio = effective_prio(p);
+	__recalc_task_prio(p);
+}
+
+static inline void trace_start_sched_wakeup(task_t *p, runqueue_t *rq)
+{
+	if (TASK_PREEMPTS_CURR(p, rq) && (p != rq->curr))
+		__trace_start_sched_wakeup(p);
+}
+
+/*
+ * __activate_task - move a task to the runqueue.
+ */
+static inline void __activate_task(task_t *p, runqueue_t *rq)
+{
+	trace_special_pid(p->pid, p->prio, rq->nr_running);
+	enqueue_task(p, rq->active);
+	rq->nr_running++;
+}
+
+/*
+ * __activate_task_after - move a task to the runqueue,
+ *                         to execute after a specific task.
+ */
+static inline
+void __activate_task_after(task_t *p, task_t *parent, runqueue_t *rq)
+{
+	// FIXME: to head rather?
+	list_add_tail(&p->run_list, &parent->run_list);
+	p->array = parent->array;
+	p->array->nr_active++;
+	rq->nr_running++;
+	inc_rt_tasks(p, rq);
+}
+
+/*
+ * __activate_idle_task - move idle task to the _front_ of runqueue.
+ */
+static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
+{
+	enqueue_task_head(p, rq->active);
+	rq->nr_running++;
+	WARN_ON(rt_task(p));
 }
 
 /*
@@ -1415,7 +1452,7 @@ void fastcall wake_up_new_task(task_t * 
 	p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) *
 		CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
 
-	p->prio = p->normal_prio = effective_prio(p);
+	__recalc_task_prio(p);
 
 	if (likely(cpu == this_cpu)) {
 		if (!(clone_flags & CLONE_VM)) {
@@ -1427,7 +1464,8 @@ void fastcall wake_up_new_task(task_t * 
 			if (unlikely(!current->array))
 				__activate_task(p, rq);
 			else {
-				p->prio = p->normal_prio = current->prio;
+				p->prio = current->prio;
+				p->normal_prio = current->normal_prio;
 				__activate_task_after(p, current, rq);
 			}
 			set_need_resched();
@@ -2730,6 +2768,10 @@ void scheduler_tick(void)
 		/*
 		 * RR tasks need a special form of timeslice management.
 		 * FIFO tasks have no timeslices.
+		 *
+		 * On PREEMPT_RT, boosted tasks will also get into this
+		 * branch and wont get their timeslice decreased until
+		 * they have done their work.
 		 */
 		if ((p->policy == SCHED_RR) && !--p->time_slice) {
 			p->time_slice = task_timeslice(p);
@@ -2744,7 +2786,7 @@ void scheduler_tick(void)
 	if (!--p->time_slice) {
 		dequeue_task(p, rq->active);
 		set_tsk_need_resched(p);
-		p->prio = p->normal_prio = effective_prio(p);
+		__recalc_task_prio(p);
 		p->time_slice = task_timeslice(p);
 		p->first_time_slice = 0;
 
@@ -3682,7 +3724,7 @@ void set_user_nice(task_t *p, long nice)
 	new_prio = NICE_TO_PRIO(nice);
 	delta = new_prio - old_prio;
 	p->static_prio = NICE_TO_PRIO(nice);
-	p->prio += delta;
+	__recalc_task_prio(p);
 
 	if (array) {
 		enqueue_task(p, array);
@@ -3712,18 +3754,6 @@ int can_nice(const task_t *p, const int 
 		capable(CAP_SYS_NICE));
 }
 
-int mutex_getprio(task_t *p)
-{
-	int prio;
-
-	if (p->policy != SCHED_NORMAL)
-		prio = MAX_RT_PRIO-1 - p->rt_priority;
-	else
-		prio = __effective_prio(p);
-	trace_special_pid(p->pid, p->prio, prio);
-	return prio;
-}
-
 /*
  * Used by the PREEMPT_RT code to implement
  * priority inheritance logic:
@@ -3880,10 +3910,7 @@ static void __setscheduler(struct task_s
 	BUG_ON(p->array);
 	p->policy = policy;
 	p->rt_priority = prio;
-	if (policy != SCHED_NORMAL)
-		p->prio = p->normal_prio = MAX_RT_PRIO-1 - p->rt_priority;
-	else
-		p->prio = p->normal_prio = p->static_prio;
+	__recalc_task_prio(p);
 }
 
 /**

  reply	other threads:[~2005-07-10 15:10 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-06-28 17:27 Real-Time Preemption, -RT-2.6.12-final-V0.7.50-24 Karsten Wiese
2005-06-28 20:21 ` Ingo Molnar
2005-06-28 20:30   ` Ingo Molnar
2005-06-28 23:51     ` Karsten Wiese
2005-06-29  6:34       ` Ingo Molnar
2005-06-29  7:00         ` Ingo Molnar
2005-06-29  9:15           ` William Weston
2005-06-29 12:56             ` Ingo Molnar
2005-06-30  1:50               ` William Weston
2005-06-29 14:48             ` Karsten Wiese
2005-06-29 19:38               ` Ingo Molnar
2005-06-29 23:36                 ` Karsten Wiese
2005-06-30 17:52                   ` Karsten Wiese
2005-06-30 19:46                     ` Real-Time Preemption, -RT-2.6.12-final-V0.7.50-37 William Weston
2005-06-30 19:52                       ` Ingo Molnar
2005-06-30 20:48                         ` Ingo Molnar
2005-06-30 22:17                           ` Gene Heskett
2005-06-30 22:15                         ` Gene Heskett
2005-06-30 20:38                       ` Real-Time Preemption, -RT-2.6.12-final-V0.7.50-38 Michal Schmidt
2005-06-30 20:47                         ` Ingo Molnar
2005-06-30 20:50                     ` Real-Time Preemption, -RT-2.6.12-final-V0.7.50-24 Ingo Molnar
2005-06-30 22:27                       ` Karsten Wiese
2005-06-30 22:59                         ` William Weston
2005-07-01  1:00                           ` William Weston
2005-07-01  0:15                         ` Karsten Wiese
2005-07-01  7:18                         ` Ingo Molnar
2005-07-01 19:34                           ` Chuck Harding
2005-07-02  1:46                           ` William Weston
2005-07-02  2:02                             ` Lee Revell
2005-07-04  8:53                               ` William Weston
2005-07-04 11:16                                 ` Ingo Molnar
2005-07-13  1:18                                   ` Lee Revell
2005-07-13  1:28                                     ` Lee Revell
2005-07-13  6:35                                       ` Ingo Molnar
2005-07-13 17:20                                         ` Lee Revell
2005-07-03 14:04                             ` Ingo Molnar
2005-07-03 18:12                               ` Ingo Molnar
2005-07-05 20:43                                 ` Real-Time Preemption, -RT-2.6.12-final-V0.7.50-45 William Weston
2005-07-06 10:04                                   ` Ingo Molnar
2005-07-06 21:12                                     ` William Weston
2005-07-07 14:56                                     ` Steven Rostedt
2005-07-07 15:31                                       ` Ingo Molnar
2005-07-07 15:42                                         ` Steven Rostedt
2005-07-07 16:10                                           ` Steven Rostedt
2005-07-07 16:48                                             ` Ingo Molnar
2005-07-08  7:02                                               ` Steven Rostedt
2005-07-08  9:54                                                 ` Steven Rostedt
2005-07-08 10:00                                                   ` Ingo Molnar
2005-07-08 20:55                                               ` Bill Davidsen
2005-07-07 17:51                                             ` Ingo Molnar
2005-07-07 21:18                                               ` Real-Time Preemption, -RT-2.6.12-final-V0.7.51-12 Chuck Harding
2005-07-08  5:41                                                 ` Ingo Molnar
2005-07-08  8:00                                                   ` Ingo Molnar
2005-07-08 18:06                                                     ` Chuck Harding
2005-07-07 19:05                                     ` Real-Time Preemption, -RT-2.6.12-final-V0.7.51-08 William Weston
2005-07-07 19:18                                       ` Ingo Molnar
2005-07-07 19:46                                         ` William Weston
2005-07-07 19:52                                           ` Ingo Molnar
2005-07-08  9:23                                       ` Ingo Molnar
2005-07-08  9:29                                         ` Ingo Molnar
2005-07-08  9:45                                           ` Ingo Molnar
2005-07-08 20:30                                             ` Real-Time Preemption, -RT-2.6.12-final-V0.7.51-17 William Weston
2005-07-07  1:26                                 ` Real-Time Preemption, -RT-2.6.12-final-V0.7.50-24 William Weston
2005-07-07 10:48                                   ` Ingo Molnar
2005-07-07 18:49                                     ` Real-Time Preemption, -RT-2.6.12-final-V0.7.51-06 William Weston
2005-07-07 20:02                                     ` Real-Time Preemption, -RT-2.6.12-final-V0.7.51-12 William Weston
2005-07-08  8:03                                       ` Ingo Molnar
2005-07-08 20:12                                         ` William Weston
2005-07-09 21:24                                           ` Peter Zijlstra
2005-07-10 11:18                                             ` Peter Zijlstra
2005-07-10 15:10                                               ` Ingo Molnar [this message]
2005-07-10 15:43                                                 ` Peter Zijlstra
2005-07-10 16:12                                                   ` Peter Zijlstra
2005-07-10 18:26                                                     ` Ingo Molnar
2005-07-11 18:49                                                     ` William Weston
2005-07-08 22:33                                         ` Real-Time Preemption, -RT-2.6.12-final-V0.7.51-17 William Weston

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050710151008.GA28194@elte.hu \
    --to=mingo@elte.hu \
    --cc=a.p.zijlstra@chello.nl \
    --cc=linux-kernel@vger.kernel.org \
    --cc=weston@sysex.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.