All of lore.kernel.org
 help / color / mirror / Atom feed
From: Con Kolivas <kernel@kolivas.org>
To: Paolo Ornati <ornati@fastwebnet.it>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Ingo Molnar <mingo@elte.hu>,
	Nick Piggin <nickpiggin@yahoo.com.au>,
	Peter Williams <pwil3058@bigpond.net.au>
Subject: Re: [SCHED] wrong priority calc - SIMPLE test case
Date: Fri, 13 Jan 2006 12:13:11 +1100	[thread overview]
Message-ID: <200601131213.14832.kernel@kolivas.org> (raw)
In-Reply-To: <20051230145221.301faa40@localhost>

[-- Attachment #1: Type: text/plain, Size: 6936 bytes --]

On Saturday 31 December 2005 00:52, Paolo Ornati wrote:
> WAS: [SCHED] Totally WRONG prority calculation with specific test-case
> (since 2.6.10-bk12)
> http://lkml.org/lkml/2005/12/27/114/index.html
>
> On Wed, 28 Dec 2005 10:26:58 +1100
>
> Con Kolivas <kernel@kolivas.org> wrote:
> > The issue is that the scheduler interactivity estimator is a state
> > machine and can be fooled to some degree, and a cpu intensive task that
> > just happens to sleep a little bit gets significantly better priority
> > than one that is fully cpu bound all the time. Reverting that change is
> > not a solution because it can still be fooled by the same process
> > sleeping lots for a few seconds or so at startup and then changing to the
> > cpu mostly-sleeping slightly behaviour. This "fluctuating" behaviour is
> > in my opinion worse which is why I removed it.
>
> Trying to find a "as simple as possible" test case for this problem
> (that I consider a BUG in priority calculation) I've come up with this
> very simple program:

Hi Paolo.

Can you try the following patch on 2.6.15 please? I'm interested in how
adversely this affects interactive performance as well as whether it helps
your test case.

Thanks,
Con



---
 include/linux/sched.h |    9 +++++-
 kernel/sched.c        |   72 ++++++++++++++++++++++----------------------------
 2 files changed, 41 insertions(+), 40 deletions(-)

Index: linux-2.6.15/include/linux/sched.h
===================================================================
--- linux-2.6.15.orig/include/linux/sched.h
+++ linux-2.6.15/include/linux/sched.h
@@ -683,6 +683,13 @@ static inline void prefetch_stack(struct
 struct audit_context;		/* See audit.c */
 struct mempolicy;
 
+enum sleep_type {
+	SLEEP_NORMAL,
+	SLEEP_NONINTERACTIVE,
+	SLEEP_INTERACTIVE,
+	SLEEP_INTERRUPTED,
+};
+
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	struct thread_info *thread_info;
@@ -704,7 +711,7 @@ struct task_struct {
 	unsigned long sleep_avg;
 	unsigned long long timestamp, last_ran;
 	unsigned long long sched_time; /* sched_clock time spent running */
-	int activated;
+	enum sleep_type sleep_type;
 
 	unsigned long policy;
 	cpumask_t cpus_allowed;
Index: linux-2.6.15/kernel/sched.c
===================================================================
--- linux-2.6.15.orig/kernel/sched.c
+++ linux-2.6.15/kernel/sched.c
@@ -751,31 +751,22 @@ static int recalc_task_prio(task_t *p, u
 		 * prevent them suddenly becoming cpu hogs and starving
 		 * other processes.
 		 */
-		if (p->mm && p->activated != -1 &&
+		if (p->mm && p->sleep_type != SLEEP_NONINTERACTIVE &&
 			sleep_time > INTERACTIVE_SLEEP(p)) {
 				p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG -
 						DEF_TIMESLICE);
 		} else {
+
 			/*
 			 * The lower the sleep avg a task has the more
-			 * rapidly it will rise with sleep time.
+			 * rapidly it will rise with sleep time. This enables
+			 * tasks to rapidly recover to a low latency priority.
+			 * If a task was sleeping with the noninteractive
+			 * label do not apply this non-linear boost
 			 */
-			sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1;
-
-			/*
-			 * Tasks waking from uninterruptible sleep are
-			 * limited in their sleep_avg rise as they
-			 * are likely to be waiting on I/O
-			 */
-			if (p->activated == -1 && p->mm) {
-				if (p->sleep_avg >= INTERACTIVE_SLEEP(p))
-					sleep_time = 0;
-				else if (p->sleep_avg + sleep_time >=
-						INTERACTIVE_SLEEP(p)) {
-					p->sleep_avg = INTERACTIVE_SLEEP(p);
-					sleep_time = 0;
-				}
-			}
+			if (p->sleep_type != SLEEP_NONINTERACTIVE || p->mm)
+				sleep_time *=
+					(MAX_BONUS - CURRENT_BONUS(p)) ? : 1;
 
 			/*
 			 * This code gives a bonus to interactive tasks.
@@ -818,11 +809,7 @@ static void activate_task(task_t *p, run
 	if (!rt_task(p))
 		p->prio = recalc_task_prio(p, now);
 
-	/*
-	 * This checks to make sure it's not an uninterruptible task
-	 * that is now waking up.
-	 */
-	if (!p->activated) {
+	if (p->sleep_type != SLEEP_NONINTERACTIVE) {
 		/*
 		 * Tasks which were woken up by interrupts (ie. hw events)
 		 * are most likely of interactive nature. So we give them
@@ -831,13 +818,13 @@ static void activate_task(task_t *p, run
 		 * on a CPU, first time around:
 		 */
 		if (in_interrupt())
-			p->activated = 2;
+			p->sleep_type = SLEEP_INTERRUPTED;
 		else {
 			/*
 			 * Normal first-time wakeups get a credit too for
 			 * on-runqueue time, but it will be weighted down:
 			 */
-			p->activated = 1;
+			p->sleep_type = SLEEP_INTERACTIVE;
 		}
 	}
 	p->timestamp = now;
@@ -1356,22 +1343,23 @@ out_activate:
 	if (old_state == TASK_UNINTERRUPTIBLE) {
 		rq->nr_uninterruptible--;
 		/*
-		 * Tasks on involuntary sleep don't earn
-		 * sleep_avg beyond just interactive state.
+		 * Tasks waking from uninterruptible sleep are likely
+		 * to be sleeping involuntarily on I/O and are otherwise
+		 * cpu bound so label them as noninteractive.
 		 */
-		p->activated = -1;
-	}
+		p->sleep_type = SLEEP_NONINTERACTIVE;
+	} else
 
 	/*
 	 * Tasks that have marked their sleep as noninteractive get
-	 * woken up without updating their sleep average. (i.e. their
-	 * sleep is handled in a priority-neutral manner, no priority
-	 * boost and no penalty.)
+	 * woken up with their sleep average not weighted in an
+	 * interactive way.
 	 */
-	if (old_state & TASK_NONINTERACTIVE)
-		__activate_task(p, rq);
-	else
-		activate_task(p, rq, cpu == this_cpu);
+		if (old_state & TASK_NONINTERACTIVE)
+			p->sleep_type = SLEEP_NONINTERACTIVE;
+
+
+	activate_task(p, rq, cpu == this_cpu);
 	/*
 	 * Sync wakeups (i.e. those types of wakeups where the waker
 	 * has indicated that it will leave the CPU in short order)
@@ -2938,6 +2926,12 @@ EXPORT_SYMBOL(sub_preempt_count);
 
 #endif
 
+static inline int interactive_sleep(enum sleep_type sleep_type)
+{
+	return (sleep_type == SLEEP_INTERACTIVE ||
+		sleep_type == SLEEP_INTERRUPTED);
+}
+
 /*
  * schedule() is the main scheduler function.
  */
@@ -3063,12 +3057,12 @@ go_idle:
 	queue = array->queue + idx;
 	next = list_entry(queue->next, task_t, run_list);
 
-	if (!rt_task(next) && next->activated > 0) {
+	if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
 		unsigned long long delta = now - next->timestamp;
 		if (unlikely((long long)(now - next->timestamp) < 0))
 			delta = 0;
 
-		if (next->activated == 1)
+		if (next->sleep_type == SLEEP_INTERACTIVE)
 			delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
 
 		array = next->array;
@@ -3081,7 +3075,7 @@ go_idle:
 		} else
 			requeue_task(next, array);
 	}
-	next->activated = 0;
+	next->sleep_type = SLEEP_NORMAL;
 switch_tasks:
 	if (next == rq->idle)
 		schedstat_inc(rq, sched_goidle);

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

  parent reply	other threads:[~2006-01-13  1:13 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-12-27 18:09 [SCHED] Totally WRONG prority calculation with specific test-case (since 2.6.10-bk12) Paolo Ornati
2005-12-27 21:48 ` Paolo Ornati
2005-12-27 23:26   ` Con Kolivas
2005-12-28 11:01     ` Paolo Ornati
2005-12-28 11:19       ` Con Kolivas
2005-12-28 11:35         ` Paolo Ornati
2005-12-28 17:23           ` Paolo Ornati
2005-12-28 17:39             ` Paolo Ornati
2005-12-30 13:52     ` [SCHED] wrong priority calc - SIMPLE test case Paolo Ornati
2005-12-31  2:06       ` Peter Williams
2005-12-31 10:34         ` Paolo Ornati
2005-12-31 10:52           ` Paolo Ornati
2005-12-31 11:12             ` Con Kolivas
2005-12-31 13:44             ` Peter Williams
2005-12-31 16:31               ` Paolo Ornati
2005-12-31 22:04                 ` Peter Williams
2005-12-31  8:13       ` Mike Galbraith
2005-12-31 11:00         ` Paolo Ornati
2005-12-31 15:11         ` Paolo Ornati
2005-12-31 16:37           ` Mike Galbraith
2005-12-31 17:24             ` Paolo Ornati
2005-12-31 17:42               ` Paolo Ornati
2006-01-01 11:39             ` Paolo Ornati
2006-01-02  9:15               ` Mike Galbraith
2006-01-02  9:50                 ` Paolo Ornati
2006-01-09 11:11                 ` Mike Galbraith
2006-01-09 15:52                   ` Mike Galbraith
2006-01-09 16:08                     ` Con Kolivas
2006-01-09 18:14                       ` Mike Galbraith
2006-01-09 20:00                     ` Paolo Ornati
2006-01-09 20:23                       ` Paolo Ornati
2006-01-10  7:08                       ` Mike Galbraith
2006-01-10 12:07                         ` Mike Galbraith
2006-01-10 12:56                           ` Paolo Ornati
2006-01-10 13:01                             ` Mike Galbraith
2006-01-10 13:53                               ` Paolo Ornati
2006-01-10 15:18                                 ` Mike Galbraith
2006-01-13  1:13       ` Con Kolivas [this message]
2006-01-13  1:32         ` Con Kolivas
2006-01-13 10:46         ` Paolo Ornati
2006-01-13 10:51           ` Con Kolivas
2006-01-13 13:01             ` Mike Galbraith
2006-01-13 14:34               ` Con Kolivas
2006-01-13 16:15                 ` Mike Galbraith
2006-01-14  2:05                   ` Con Kolivas
2006-01-14  2:56                     ` Mike Galbraith
2005-12-27 23:59   ` [SCHED] Totally WRONG prority calculation with specific test-case (since 2.6.10-bk12) Peter Williams
2005-12-28 10:20     ` Paolo Ornati
2005-12-28 13:38       ` Peter Williams
2005-12-28 19:45         ` Paolo Ornati
2005-12-29  3:13         ` Nick Piggin
2005-12-29  3:35           ` Peter Williams
2005-12-29  8:11             ` Nick Piggin
  -- strict thread matches above, loose matches on Subject: below --
2006-01-27 16:57 [SCHED] wrong priority calc - SIMPLE test case Con Kolivas
2006-01-27 20:06 ` MIke Galbraith
2006-01-27 23:18   ` Con Kolivas
2006-01-28  0:01     ` Peter Williams
2006-01-28  3:43     ` MIke Galbraith

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200601131213.14832.kernel@kolivas.org \
    --to=kernel@kolivas.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=nickpiggin@yahoo.com.au \
    --cc=ornati@fastwebnet.it \
    --cc=pwil3058@bigpond.net.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.