From: Peter Williams <pwil3058@bigpond.net.au>
To: Paolo Ornati <ornati@fastwebnet.it>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
Chris Han <xiphux@gmail.com>, Con Kolivas <kernel@kolivas.org>,
William Lee Irwin III <wli@holomorphy.com>,
Jake Moilanen <moilanen@austin.ibm.com>
Subject: Re: [ANNOUNCE][RFC] PlugSched-6.2 for 2.6.16-rc1 and 2.6.16-rc1-mm1
Date: Thu, 26 Jan 2006 12:09:53 +1100 [thread overview]
Message-ID: <43D82161.6000809@bigpond.net.au> (raw)
In-Reply-To: <20060123212158.3fba71d5@localhost>
[-- Attachment #1: Type: text/plain, Size: 2011 bytes --]
Paolo Ornati wrote:
> On Mon, 23 Jan 2006 11:49:33 +1100
> Peter Williams <pwil3058@bigpond.net.au> wrote:
>
>
>>>However, in spite of the above, the fairness mechanism should have been
>>>able to generate enough bonus points to get dd's priority back to less
>>>than 34. I'm still investigating why this didn't happen.
>>
>>Problem solved. It was a scaling issue during the calculation of
>>expected delay. The attached patch should fix both the CPU hog problem
>>and the fairness problem. Could you give it a try?
>>
>
>
> Mmmm... it doesn't work:
>
> PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
> 5516 paolo 34 0 115m 18m 2432 S 87.5 3.7 0:23.72 transcode
> 5530 paolo 34 0 51000 4472 1872 S 8.0 0.9 0:02.29 tcdecode
> 5523 paolo 34 0 19840 1088 880 R 2.0 0.2 0:00.21 tcdemux
> 5522 paolo 34 0 22156 1204 972 R 0.7 0.2 0:00.02 tccat
> 5539 paolo 34 0 4952 1468 372 D 0.7 0.3 0:00.04 dd
> 5350 root 28 0 167m 16m 3228 S 0.3 3.4 0:03.64 X
>
> PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
> 5456 paolo 34 0 115m 18m 2432 D 63.9 3.7 0:48.21 transcode
> 5470 paolo 37 0 50996 4472 1872 R 6.2 0.9 0:05.20 tcdecode
> 5493 paolo 34 0 4952 1472 372 R 1.5 0.3 0:00.22 dd
> 5441 paolo 28 0 86656 21m 15m S 0.2 4.4 0:00.77 konsole
> 5468 paolo 34 0 19840 1088 880 S 0.2 0.2 0:00.23 tcdemux
>
I know that I've said this before but I've found the problem.
Embarrassingly, it was a basic book keeping error (recently introduced
and equivalent to getting nr_running wrong for each CPU) in the
gathering of the statistics that I use. :-(
The attached patch (applied on top of the PlugSched patch) should fix
things. Could you test it please?
Thanks
Peter
--
Peter Williams pwil3058@bigpond.net.au
"Learning, n. The kind of ignorance distinguishing the studious."
-- Ambrose Bierce
[-- Attachment #2: fix-spa_ws-scheduler-v2 --]
[-- Type: text/plain, Size: 7444 bytes --]
Index: MM-2.6.16/kernel/sched_spa_ws.c
===================================================================
--- MM-2.6.16.orig/kernel/sched_spa_ws.c 2006-01-21 16:42:45.000000000 +1100
+++ MM-2.6.16/kernel/sched_spa_ws.c 2006-01-26 11:44:14.000000000 +1100
@@ -44,7 +44,8 @@ static unsigned int initial_ia_bonus = D
#define LSHARES_AVG_OFFSET 7
#define LSHARES_AVG_ALPHA ((1 << LSHARES_AVG_OFFSET) - 2)
#define LSHARES_AVG_INCR(a) ((a) << 1)
-#define LSHARES_AVG_ONE (1UL << LSHARES_AVG_OFFSET)
+#define LSHARES_AVG_REAL(s) ((s) << LSHARES_AVG_OFFSET)
+#define LSHARES_AVG_ONE LSAHRES_AVG_REAL(1UL)
#define LSHARES_AVG_MUL(a, b) (((a) * (b)) >> LSHARES_AVG_OFFSET)
static unsigned int max_fairness_bonus = DEF_MAX_FAIRNESS_BONUS;
@@ -121,32 +122,9 @@ static inline void zero_interactive_bonu
p->sdu.spa.interactive_bonus = 0;
}
-static inline int current_fairness_bonus(const struct task_struct *p)
-{
- return p->sdu.spa.auxilary_bonus >> FAIRNESS_BONUS_OFFSET;
-}
-
-static inline int current_fairness_bonus_rnd(const struct task_struct *p)
-{
- return (p->sdu.spa.auxilary_bonus + (1UL << (FAIRNESS_BONUS_OFFSET - 1)))
- >> FAIRNESS_BONUS_OFFSET;
-}
-
-static inline void decr_fairness_bonus(struct task_struct *p)
-{
- p->sdu.spa.auxilary_bonus *= ((1UL << FAIRNESS_BONUS_OFFSET) - 2);
- p->sdu.spa.auxilary_bonus >>= FAIRNESS_BONUS_OFFSET;
-}
-
-static inline void incr_fairness_bonus(struct task_struct *p)
-{
- decr_fairness_bonus(p);
- p->sdu.spa.auxilary_bonus += (max_fairness_bonus << 1);
-}
-
static inline int bonuses(const struct task_struct *p)
{
- return current_ia_bonus_rnd(p) + current_fairness_bonus_rnd(p);
+ return current_ia_bonus_rnd(p) + p->sdu.spa.auxilary_bonus;
}
static int spa_ws_effective_prio(const struct task_struct *p)
@@ -211,43 +189,37 @@ static inline unsigned int map_ratio(uns
static void spa_ws_reassess_fairness_bonus(struct task_struct *p)
{
- unsigned long long expected_delay;
+ unsigned long long expected_delay, adjusted_delay;
unsigned long long avg_lshares;
+ unsigned long pshares;
-#if 0
p->sdu.spa.auxilary_bonus = 0;
if (max_fairness_bonus == 0)
return;
-#endif
+ pshares = LSHARES_AVG_REAL(p->sdu.spa.eb_shares);
avg_lshares = per_cpu(rq_avg_lshares, task_cpu(p));
- if (avg_lshares <= p->sdu.spa.eb_shares)
+ if (avg_lshares <= pshares)
expected_delay = 0;
else {
- expected_delay = LSHARES_AVG_MUL(p->sdu.spa.avg_cpu_per_cycle,
- (avg_lshares - p->sdu.spa.eb_shares));
- (void)do_div(expected_delay, p->sdu.spa.eb_shares);
+ expected_delay = p->sdu.spa.avg_cpu_per_cycle *
+ (avg_lshares - pshares);
+ (void)do_div(expected_delay, pshares);
}
-#if 1
- if (p->sdu.spa.avg_delay_per_cycle > expected_delay)
- incr_fairness_bonus(p);
- else
- decr_fairness_bonus(p);
-#else
+
/*
* No delay means no bonus, but
* NB this test also avoids a possible divide by zero error if
* cpu is also zero and negative bonuses
*/
- lhs = p->sdu.spa.avg_delay_per_cycle;
- if (lhs <= rhs)
+ if (p->sdu.spa.avg_delay_per_cycle <= expected_delay)
return;
- lhs -= rhs;
+ adjusted_delay = p->sdu.spa.avg_delay_per_cycle - expected_delay;
p->sdu.spa.auxilary_bonus =
- map_ratio(lhs, lhs + p->sdu.spa.avg_cpu_per_cycle,
+ map_ratio(adjusted_delay,
+ adjusted_delay + p->sdu.spa.avg_cpu_per_cycle,
max_fairness_bonus);
-#endif
}
static inline int spa_ws_eligible(struct task_struct *p)
@@ -255,6 +227,15 @@ static inline int spa_ws_eligible(struct
return p->sdu.spa.avg_sleep_per_cycle < WS_BIG_SLEEP;
}
+static inline int spa_sleepiness_exceeds_ppt(const struct task_struct *p,
+ unsigned int ppt)
+{
+ return RATIO_EXCEEDS_PPT(p->sdu.spa.avg_sleep_per_cycle,
+ p->sdu.spa.avg_sleep_per_cycle +
+ p->sdu.spa.avg_cpu_per_cycle,
+ ppt);
+}
+
static void spa_ws_reassess_at_activation(struct task_struct *p)
{
spa_ws_reassess_fairness_bonus(p);
@@ -264,7 +245,7 @@ static void spa_ws_reassess_at_activatio
else
partial_incr_interactive_bonus(p);
}
- else if (!spa_ia_sleepiness_exceeds_ppt(p, iab_decr_threshold))
+ else if (!spa_sleepiness_exceeds_ppt(p, iab_decr_threshold))
decr_interactive_bonus(p);
else if (!spa_ia_sleepiness_exceeds_ppt(p, (iab_decr_threshold + iab_incr_threshold) / 2))
partial_decr_interactive_bonus(p);
@@ -284,7 +265,7 @@ static void spa_ws_reassess_at_end_of_ts
/* Don't punish tasks that have done a lot of sleeping for the
* occasional run of short sleeps unless they become a cpu hog.
*/
- if (!spa_ia_sleepiness_exceeds_ppt(p, iab_decr_threshold))
+ if (!spa_sleepiness_exceeds_ppt(p, iab_decr_threshold))
decr_interactive_bonus(p);
else if (!spa_ia_sleepiness_exceeds_ppt(p, (iab_decr_threshold + iab_incr_threshold) / 2))
partial_decr_interactive_bonus(p);
Index: MM-2.6.16/kernel/sched_spa.c
===================================================================
--- MM-2.6.16.orig/kernel/sched_spa.c 2006-01-21 16:41:32.000000000 +1100
+++ MM-2.6.16/kernel/sched_spa.c 2006-01-26 11:43:20.000000000 +1100
@@ -490,18 +490,29 @@ static inline int effective_prio(const t
return spa_sched_child->normal_effective_prio(p);
}
+static inline void spa_inc_nr_running(task_t *p, runqueue_t *rq)
+{
+ inc_nr_running(p, rq);
+ check_restart_promotions(rq);
+ if (!rt_task(p))
+ rq->qu.spa.nr_active_eb_shares += p->sdu.spa.eb_shares;
+}
+
+static inline void spa_dec_nr_running(task_t *p, runqueue_t *rq)
+{
+ dec_nr_running(p, rq);
+ check_stop_promotions(rq);
+ if (!rt_task(p))
+ rq->qu.spa.nr_active_eb_shares -= p->sdu.spa.eb_shares;
+}
+
/*
* __activate_task - move a task to the runqueue.
*/
static inline void __activate_task(task_t *p, runqueue_t *rq)
{
- struct spa_runqueue_queue *rqq = &rq->qu.spa;
-
- enqueue_task(p, rqq);
- inc_nr_running(p, rq);
- check_restart_promotions(rq);
- if (!rt_task(p))
- rqq->nr_active_eb_shares += p->sdu.spa.eb_shares;
+ enqueue_task(p, &rq->qu.spa);
+ spa_inc_nr_running(p, rq);
}
static inline void do_nothing_to_task(task_t *p) {}
@@ -536,11 +547,8 @@ static inline void deactivate_task(struc
{
struct spa_runqueue_queue *rqq = &rq->qu.spa;
- dec_nr_running(p, rq);
+ spa_dec_nr_running(p, rq);
dequeue_task(p, rqq);
- check_stop_promotions(rq);
- if (!rt_task(p))
- rqq->nr_active_eb_shares -= p->sdu.spa.eb_shares;
}
/*
@@ -648,7 +656,7 @@ void spa_wake_up_new_task(task_t * p, un
} else {
p->prio = current->prio;
list_add_tail(&p->run_list, ¤t->run_list);
- inc_nr_running(p, rq);
+ spa_inc_nr_running(p, rq);
check_restart_promotions(rq);
}
set_need_resched();
@@ -678,13 +686,11 @@ static inline
void pull_task(runqueue_t *src_rq, task_t *p, runqueue_t *this_rq, int this_cpu)
{
dequeue_task(p, &src_rq->qu.spa);
- dec_nr_running(p, src_rq);
- check_stop_promotions(src_rq);
+ spa_dec_nr_running(p, src_rq);
set_task_cpu(p, this_cpu);
adjust_timestamp(p, this_rq, src_rq);
- inc_nr_running(p, this_rq);
+ spa_inc_nr_running(p, this_rq);
enqueue_task(p, &this_rq->qu.spa);
- check_restart_promotions(this_rq);
preempt_if_warranted(p, this_rq);
}
@@ -1333,7 +1339,7 @@ void spa_set_select_idle_first(struct ru
__setscheduler(rq->idle, SCHED_FIFO, MAX_RT_PRIO - 1);
/* Add idle task to _front_ of it's priority queue */
enqueue_task_head(rq->idle, &rq->qu.spa);
- inc_nr_running(rq->idle, rq);
+ spa_inc_nr_running(rq->idle, rq);
}
void spa_set_select_idle_last(struct runqueue *rq)
next prev parent reply other threads:[~2006-01-26 1:09 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-01-19 21:45 [ANNOUNCE][RFC] PlugSched-6.2 for 2.6.16-rc1 and 2.6.16-rc1-mm1 Peter Williams
2006-01-21 6:48 ` Peter Williams
2006-01-21 10:46 ` Paolo Ornati
2006-01-21 23:06 ` Peter Williams
2006-01-22 22:47 ` Peter Williams
2006-01-23 0:49 ` Peter Williams
2006-01-23 20:21 ` Paolo Ornati
2006-01-24 0:00 ` Peter Williams
2006-01-26 1:09 ` Peter Williams [this message]
2006-01-26 8:11 ` Paolo Ornati
2006-01-26 22:34 ` Peter Williams
2006-01-28 23:44 ` Peter Williams
2006-01-31 17:44 ` Paolo Ornati
2006-01-23 20:09 ` Paolo Ornati
2006-01-23 20:25 ` Lee Revell
2006-01-23 20:52 ` Paolo Ornati
2006-01-23 20:59 ` Lee Revell
2006-01-23 21:10 ` Paolo Ornati
2006-01-23 21:11 ` Lee Revell
2006-01-23 23:32 ` Peter Williams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=43D82161.6000809@bigpond.net.au \
--to=pwil3058@bigpond.net.au \
--cc=kernel@kolivas.org \
--cc=linux-kernel@vger.kernel.org \
--cc=moilanen@austin.ibm.com \
--cc=ornati@fastwebnet.it \
--cc=wli@holomorphy.com \
--cc=xiphux@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.