From: Con Kolivas <kernel@kolivas.org>
To: AndrewMorton <akpm@osdl.org>, Ingo Molnar <mingo@elte.hu>
Cc: ck@vds.kolivas.org, "Markus Törnqvist" <mjt@nysv.org>,
"Carlos Carvalho" <carlos@fisica.ufpr.br>,
linux-kernel@vger.kernel.org
Subject: [PATCH] SCHED: Implement nice support across physical cpus on SMP
Date: Sat, 21 May 2005 15:00:39 +1000 [thread overview]
Message-ID: <200505211500.42527.kernel@kolivas.org> (raw)
In-Reply-To: <200505182345.22614.kernel@kolivas.org>
[-- Attachment #1.1: Type: text/plain, Size: 169 bytes --]
Ok I've respun the smp nice support for the cpu scheduler modifications that
are in current -mm. Tested on 2.6.12-rc4-mm2 on 4x and seems to work fine.
Con
---
[-- Attachment #1.2: sched-implement_smp_nice_support.diff --]
[-- Type: text/x-diff, Size: 7254 bytes --]
This patch implements 'nice' support across physical cpus on SMP.
It introduces an extra runqueue variable prio_bias which is the sum of the
(inverted) static priorities of all the tasks on the runqueue. This is then used
to bias busy rebalancing between runqueues to obtain good distribution of tasks
of different nice values. By biasing the balancing only during busy rebalancing
we can avoid having any significant loss of throughput by not affecting the
carefully tuned idle balancing already in place. If all tasks are running at the
same nice level this code should also have minimal effect. The code is optimised
out in the !CONFIG_SMP case.
Signed-off-by: Con Kolivas <kernel@kolivas.org>
Index: linux-2.6.12-rc4-mm2/kernel/sched.c
===================================================================
--- linux-2.6.12-rc4-mm2.orig/kernel/sched.c 2005-05-21 11:14:49.000000000 +1000
+++ linux-2.6.12-rc4-mm2/kernel/sched.c 2005-05-21 14:25:07.000000000 +1000
@@ -208,6 +208,7 @@ struct runqueue {
*/
unsigned long nr_running;
#ifdef CONFIG_SMP
+ unsigned long prio_bias;
unsigned long cpu_load[3];
#endif
unsigned long long nr_switches;
@@ -657,13 +658,45 @@ static int effective_prio(task_t *p)
return prio;
}
+#ifdef CONFIG_SMP
+static inline void inc_prio_bias(runqueue_t *rq, int static_prio)
+{
+ rq->prio_bias += MAX_PRIO - static_prio;
+}
+
+static inline void dec_prio_bias(runqueue_t *rq, int static_prio)
+{
+ rq->prio_bias -= MAX_PRIO - static_prio;
+}
+#else
+static inline void inc_prio_bias(runqueue_t *rq, int static_prio)
+{
+}
+
+static inline void dec_prio_bias(runqueue_t *rq, int static_prio)
+{
+}
+#endif
+
+static inline void inc_nr_running(task_t *p, runqueue_t *rq)
+{
+ rq->nr_running++;
+ inc_prio_bias(rq, p->static_prio);
+}
+
+static inline void dec_nr_running(task_t *p, runqueue_t *rq)
+{
+ rq->nr_running--;
+ dec_prio_bias(rq, p->static_prio);
+}
+
/*
* __activate_task - move a task to the runqueue.
*/
static inline void __activate_task(task_t *p, runqueue_t *rq)
{
enqueue_task(p, rq->active);
- rq->nr_running++;
+ inc_nr_running(p, rq);
}
/*
@@ -672,7 +705,7 @@ static inline void __activate_task(task_
static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
{
enqueue_task_head(p, rq->active);
- rq->nr_running++;
+ inc_nr_running(p, rq);
}
static void recalc_task_prio(task_t *p, unsigned long long now)
@@ -791,7 +824,7 @@ static void activate_task(task_t *p, run
*/
static void deactivate_task(struct task_struct *p, runqueue_t *rq)
{
- rq->nr_running--;
+ dec_nr_running(p, rq);
dequeue_task(p, p->array);
p->array = NULL;
}
@@ -928,27 +961,54 @@ void kick_process(task_t *p)
* We want to under-estimate the load of migration sources, to
* balance conservatively.
*/
-static inline unsigned long source_load(int cpu, int type)
+static inline unsigned long __source_load(int cpu, int type, enum idle_type idle)
{
runqueue_t *rq = cpu_rq(cpu);
- unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
+ unsigned long cpu_load = rq->cpu_load[type-1],
+ load_now = rq->nr_running * SCHED_LOAD_SCALE;
+
+ if (idle == NOT_IDLE) {
+ /*
+ * If we are balancing busy runqueues the load is biased by
+ * priority to create 'nice' support across cpus.
+ */
+ cpu_load *= rq->prio_bias;
+ load_now *= rq->prio_bias;
+ }
+
if (type == 0)
return load_now;
- return min(rq->cpu_load[type-1], load_now);
+ return min(cpu_load, load_now);
+}
+
+static inline unsigned long source_load(int cpu, int type)
+{
+ return __source_load(cpu, type, NOT_IDLE);
}
/*
* Return a high guess at the load of a migration-target cpu
*/
-static inline unsigned long target_load(int cpu, int type)
+static inline unsigned long __target_load(int cpu, int type, enum idle_type idle)
{
runqueue_t *rq = cpu_rq(cpu);
- unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
+ unsigned long cpu_load = rq->cpu_load[type-1],
+ load_now = rq->nr_running * SCHED_LOAD_SCALE;
+
if (type == 0)
return load_now;
- return max(rq->cpu_load[type-1], load_now);
+ if (idle == NOT_IDLE) {
+ cpu_load *= rq->prio_bias;
+ load_now *= rq->prio_bias;
+ }
+ return max(cpu_load, load_now);
+}
+
+static inline unsigned long target_load(int cpu, int type)
+{
+ return __target_load(cpu, type, NOT_IDLE);
}
/*
@@ -1389,7 +1449,7 @@ void fastcall wake_up_new_task(task_t *
list_add_tail(&p->run_list, ¤t->run_list);
p->array = current->array;
p->array->nr_active++;
- rq->nr_running++;
+ inc_nr_running(p, rq);
}
set_need_resched();
} else
@@ -1733,9 +1793,9 @@ void pull_task(runqueue_t *src_rq, prio_
runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
{
dequeue_task(p, src_array);
- src_rq->nr_running--;
+ dec_nr_running(p, src_rq);
set_task_cpu(p, this_cpu);
- this_rq->nr_running++;
+ inc_nr_running(p, this_rq);
enqueue_task(p, this_array);
p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
+ this_rq->timestamp_last_tick;
@@ -1909,9 +1969,9 @@ find_busiest_group(struct sched_domain *
for_each_cpu_mask(i, group->cpumask) {
/* Bias balancing toward cpus of our domain */
if (local_group)
- load = target_load(i, load_idx);
+ load = __target_load(i, load_idx, idle);
else
- load = source_load(i, load_idx);
+ load = __source_load(i, load_idx, idle);
avg_load += load;
}
@@ -2012,14 +2072,15 @@ out_balanced:
/*
* find_busiest_queue - find the busiest runqueue among the cpus in group.
*/
-static runqueue_t *find_busiest_queue(struct sched_group *group)
+static runqueue_t *find_busiest_queue(struct sched_group *group,
+ enum idle_type idle)
{
unsigned long load, max_load = 0;
runqueue_t *busiest = NULL;
int i;
for_each_cpu_mask(i, group->cpumask) {
- load = source_load(i, 0);
+ load = __source_load(i, 0, idle);
if (load > max_load) {
max_load = load;
@@ -2060,7 +2121,7 @@ static int load_balance(int this_cpu, ru
goto out_balanced;
}
- busiest = find_busiest_queue(group);
+ busiest = find_busiest_queue(group, idle);
if (!busiest) {
schedstat_inc(sd, lb_nobusyq[idle]);
goto out_balanced;
@@ -2168,7 +2229,7 @@ static int load_balance_newidle(int this
goto out_balanced;
}
- busiest = find_busiest_queue(group);
+ busiest = find_busiest_queue(group, NEWLY_IDLE);
if (!busiest) {
schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
goto out_balanced;
@@ -3338,7 +3399,9 @@ void set_user_nice(task_t *p, long nice)
* not SCHED_NORMAL:
*/
if (rt_task(p)) {
+ dec_prio_bias(rq, p->static_prio);
p->static_prio = NICE_TO_PRIO(nice);
+ inc_prio_bias(rq, p->static_prio);
goto out_unlock;
}
array = p->array;
@@ -3348,7 +3411,9 @@ void set_user_nice(task_t *p, long nice)
old_prio = p->prio;
new_prio = NICE_TO_PRIO(nice);
delta = new_prio - old_prio;
+ dec_prio_bias(rq, p->static_prio);
p->static_prio = NICE_TO_PRIO(nice);
+ inc_prio_bias(rq, p->static_prio);
p->prio += delta;
if (array) {
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
next prev parent reply other threads:[~2005-05-21 5:01 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-05-07 13:42 [PATCH] implement nice support across physical cpus on SMP Con Kolivas
2005-05-07 17:59 ` Carlos Carvalho
2005-05-07 21:45 ` Con Kolivas
2005-05-09 11:24 ` Markus Törnqvist
2005-05-09 11:28 ` [ck] " Markus Törnqvist
2005-05-09 11:47 ` Con Kolivas
2005-05-09 18:55 ` Markus Törnqvist
2005-05-09 23:54 ` Carlos Carvalho
2005-05-11 2:56 ` Con Kolivas
2005-05-11 3:04 ` [SMP NICE] [PATCH 1/2] SCHED: Implement " Con Kolivas
2005-05-11 3:05 ` [SMP NICE] [PATCH 2/2] SCHED: Make SMP nice a config option Con Kolivas
2005-05-11 7:20 ` Ingo Molnar
2005-05-12 10:49 ` Con Kolivas
2005-05-16 11:33 ` [SMP NICE] [PATCH] SCHED: Implement nice support across physical cpus on SMP Con Kolivas
2005-05-16 18:31 ` Markus Törnqvist
2005-05-17 13:39 ` Carlos Carvalho
2005-05-18 11:30 ` Markus Törnqvist
2005-05-18 13:45 ` Con Kolivas
2005-05-21 5:00 ` Con Kolivas [this message]
2005-05-23 9:28 ` [PATCH] SCHED: change_prio_bias_only_if_queued Con Kolivas
2005-05-23 10:07 ` [PATCH] SCHED: account_rt_tasks_in_prio_bias Con Kolivas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200505211500.42527.kernel@kolivas.org \
--to=kernel@kolivas.org \
--cc=akpm@osdl.org \
--cc=carlos@fisica.ufpr.br \
--cc=ck@vds.kolivas.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=mjt@nysv.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox