From: chen Shang <shangcs@gmail.com>
To: linux-kernel@vger.kernel.org, rml@tech9.net
Cc: shangcs@gmail.com
Subject: [PATCH] kernel <linux-2.6.11.10> kernel/sched.c
Date: Thu, 19 May 2005 09:56:44 -0700 [thread overview]
Message-ID: <855e4e4605051909561f47351@mail.gmail.com> (raw)
Given the frequency of schedule() calling, there is a margin to
improve it. In step of recalculate task priority, it first dequeues
from one priority queue, calls recalc_task_prio(), then enqueue the
task into another priority queue. However, statistics shows only
around 0.5% of recalculation changed task priority (see below). While
rest of 99.5% of recalculation do not change priority, it is
reasonably to use requeue_task() to avoid overhead of dequeue and
enqueue on the same priority queue.
The patch is implemented with above idea. Note, a new help function,
change_queue_task(), to combine dequeue() and enqueue() to reduce one
function call overhead. Two statistics fields, sched_prio_changed and
sched_prio_unchanged, are added to provide statistic data on priority
recalculation.
Thanks,
chen
shangcs@gmail.com
/*===== Statistics ===== */
The statistics is based on Intel x86 machine with 2 Xeon 1.8G
processors with hyperthreading enable.
Prio_unchanged prio_changed sched_cnt
CPU0 109 22743 59123 CPU1 120 23733 60407
CPU2 73 29981 86153 CPU3 96 22050 53094
/*===== Patch <linux-2.6.11.10> kernel/sched.c =====*/
--- linux-2.6.11.10.orig/kernel/sched.c 2005-05-16 10:51:53.000000000 -0700
+++ linux-2.6.11.10/kernel/sched.c 2005-05-18 22:31:32.000000000 -0700
@@ -249,6 +249,8 @@
unsigned long sched_noswitch;
unsigned long sched_switch;
unsigned long sched_cnt;
+ unsigned long sched_prio_changed;
+ unsigned long sched_prio_unchanged;
unsigned long sched_goidle;
/* pull_task() stats */
@@ -347,12 +349,20 @@
/* runqueue-specific stats */
seq_printf(seq,
- "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu "
- "%lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
+ "cpu%d \n\tyld: both(%lu) act(%lu) both(%lu) cnt(%lu) "
+ "\n\tsched: noswitch(%lu) switch(%lu) "
+ "\n\t\t cnt(%lu) prio_changed(%lu) prio_unchanged(%lu)"
+ "\n\t\t goidle(%lu) "
+ "\n\talb: cnt(%lu) gained(%lu) lost(%lu) failed(%lu) "
+ "\n\tttwu:cnt(%lu) moved(%lu) attempts(%lu) "
+ "\n\twunt: cnt(%lu) moved(%lu) "
+ "\n\tsmt: cnt(%lu) \n\tsbe: cnt(%lu) "
+ "\n\trq_sched_info: cpu_time(%lu) run_delay(%lu) pcnt(%lu)",
cpu, rq->yld_both_empty,
rq->yld_act_empty, rq->yld_exp_empty,
rq->yld_cnt, rq->sched_noswitch,
- rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
+ rq->sched_switch, rq->sched_cnt, rq->sched_prio_changed,
+ rq->sched_prio_unchanged, rq->sched_goidle,
rq->alb_cnt, rq->alb_gained, rq->alb_lost,
rq->alb_failed,
rq->ttwu_cnt, rq->ttwu_moved, rq->ttwu_attempts,
@@ -374,14 +384,14 @@
seq_printf(seq, "domain%d %s", dcnt++, mask_str);
for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
itype++) {
- seq_printf(seq, " %lu %lu %lu %lu %lu",
+ seq_printf(seq, "lb: cnt(%lu) failed(%lu) imbl(%lu) nobzq(%lu) %lu",
sd->lb_cnt[itype],
sd->lb_failed[itype],
sd->lb_imbalance[itype],
sd->lb_nobusyq[itype],
sd->lb_nobusyg[itype]);
}
- seq_printf(seq, " %lu %lu %lu %lu\n",
+ seq_printf(seq, "sbe: pushed(%lu) attempts(%lu) %lu %lu\n",
sd->sbe_pushed, sd->sbe_attempts,
sd->ttwu_wake_affine, sd->ttwu_wake_balance);
}
@@ -580,6 +590,18 @@
p->array = array;
}
+static void change_queue_task(struct task_struct *p, prio_array_t *array,
+ int old_prio)
+{
+ list_del(&p->run_list);
+ if (list_empty(array->queue + old_prio))
+ __clear_bit(old_prio, array->bitmap);
+
+ sched_info_queued(p);
+ list_add_tail(&p->run_list, array->queue + p->prio);
+ __set_bit(p->prio, array->bitmap);
+ p->array = array;
+}
/*
* Put task to the end of the run list without the overhead of dequeue
* followed by enqueue.
@@ -2668,7 +2690,7 @@
struct list_head *queue;
unsigned long long now;
unsigned long run_time;
- int cpu, idx;
+ int cpu, idx, prio;
/*
* Test if we are atomic. Since do_exit() needs to call into
@@ -2787,9 +2809,19 @@
delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
array = next->array;
- dequeue_task(next, array);
+ prio = next->prio;
recalc_task_prio(next, next->timestamp + delta);
- enqueue_task(next, array);
+
+ if (unlikely(prio != next->prio))
+ {
+ change_queue_task(next, array, prio);
+ schedstat_inc(rq, sched_prio_changed);
+ }
+ else
+ {
+ requeue_task(next, array);
+ schedstat_inc(rq, sched_prio_unchanged);
+ }
}
next->activated = 0;
switch_tasks:
next reply other threads:[~2005-05-19 16:56 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-05-19 16:56 chen Shang [this message]
2005-05-20 3:26 ` [PATCH] kernel <linux-2.6.11.10> kernel/sched.c Nick Piggin
2005-05-20 4:17 ` chen Shang
2005-05-20 4:32 ` Lee Revell
2005-05-20 5:13 ` Nick Piggin
2005-05-20 7:12 ` chen Shang
2005-05-20 7:21 ` Nick Piggin
2005-05-20 7:36 ` Con Kolivas
2005-05-20 13:41 ` chen Shang
2005-05-20 9:49 ` Ingo Molnar
2005-05-20 10:40 ` Con Kolivas
2005-05-20 11:34 ` Ingo Molnar
2005-05-22 4:41 ` Chen Shang
2005-05-23 7:11 ` Ingo Molnar
2005-05-23 14:45 ` Chen Shang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=855e4e4605051909561f47351@mail.gmail.com \
--to=shangcs@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=rml@tech9.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox