From: chen Shang <shangcs@gmail.com>
To: linux-kernel@vger.kernel.org, rml@tech9.net
Cc: shangcs@gmail.com
Subject: [PATCH] kernel <linux-2.6.11.10> kernel/sched.c
Date: Thu, 19 May 2005 09:56:44 -0700 [thread overview]
Message-ID: <855e4e4605051909561f47351@mail.gmail.com> (raw)
Given the frequency of schedule() calling, there is a margin to
improve it. In step of recalculate task priority, it first dequeues
from one priority queue, calls recalc_task_prio(), then enqueue the
task into another priority queue. However, statistics shows only
around 0.5% of recalculation changed task priority (see below). While
rest of 99.5% of recalculation do not change priority, it is
reasonably to use requeue_task() to avoid overhead of dequeue and
enqueue on the same priority queue.
The patch is implemented with above idea. Note, a new help function,
change_queue_task(), to combine dequeue() and enqueue() to reduce one
function call overhead. Two statistics fields, sched_prio_changed and
sched_prio_unchanged, are added to provide statistic data on priority
recalculation.
Thanks,
chen
shangcs@gmail.com
/*===== Statistics ===== */
The statistics is based on Intel x86 machine with 2 Xeon 1.8G
processors with hyperthreading enable.
Prio_unchanged prio_changed sched_cnt
CPU0 109 22743 59123 CPU1 120 23733 60407
CPU2 73 29981 86153 CPU3 96 22050 53094
/*===== Patch <linux-2.6.11.10> kernel/sched.c =====*/
--- linux-2.6.11.10.orig/kernel/sched.c 2005-05-16 10:51:53.000000000 -0700
+++ linux-2.6.11.10/kernel/sched.c 2005-05-18 22:31:32.000000000 -0700
@@ -249,6 +249,8 @@
unsigned long sched_noswitch;
unsigned long sched_switch;
unsigned long sched_cnt;
+ unsigned long sched_prio_changed;
+ unsigned long sched_prio_unchanged;
unsigned long sched_goidle;
/* pull_task() stats */
@@ -347,12 +349,20 @@
/* runqueue-specific stats */
seq_printf(seq,
- "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu "
- "%lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
+ "cpu%d \n\tyld: both(%lu) act(%lu) both(%lu) cnt(%lu) "
+ "\n\tsched: noswitch(%lu) switch(%lu) "
+ "\n\t\t cnt(%lu) prio_changed(%lu) prio_unchanged(%lu)"
+ "\n\t\t goidle(%lu) "
+ "\n\talb: cnt(%lu) gained(%lu) lost(%lu) failed(%lu) "
+ "\n\tttwu:cnt(%lu) moved(%lu) attempts(%lu) "
+ "\n\twunt: cnt(%lu) moved(%lu) "
+ "\n\tsmt: cnt(%lu) \n\tsbe: cnt(%lu) "
+ "\n\trq_sched_info: cpu_time(%lu) run_delay(%lu) pcnt(%lu)",
cpu, rq->yld_both_empty,
rq->yld_act_empty, rq->yld_exp_empty,
rq->yld_cnt, rq->sched_noswitch,
- rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
+ rq->sched_switch, rq->sched_cnt, rq->sched_prio_changed,
+ rq->sched_prio_unchanged, rq->sched_goidle,
rq->alb_cnt, rq->alb_gained, rq->alb_lost,
rq->alb_failed,
rq->ttwu_cnt, rq->ttwu_moved, rq->ttwu_attempts,
@@ -374,14 +384,14 @@
seq_printf(seq, "domain%d %s", dcnt++, mask_str);
for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
itype++) {
- seq_printf(seq, " %lu %lu %lu %lu %lu",
+ seq_printf(seq, "lb: cnt(%lu) failed(%lu) imbl(%lu) nobzq(%lu) %lu",
sd->lb_cnt[itype],
sd->lb_failed[itype],
sd->lb_imbalance[itype],
sd->lb_nobusyq[itype],
sd->lb_nobusyg[itype]);
}
- seq_printf(seq, " %lu %lu %lu %lu\n",
+ seq_printf(seq, "sbe: pushed(%lu) attempts(%lu) %lu %lu\n",
sd->sbe_pushed, sd->sbe_attempts,
sd->ttwu_wake_affine, sd->ttwu_wake_balance);
}
@@ -580,6 +590,18 @@
p->array = array;
}
+static void change_queue_task(struct task_struct *p, prio_array_t *array,
+ int old_prio)
+{
+ list_del(&p->run_list);
+ if (list_empty(array->queue + old_prio))
+ __clear_bit(old_prio, array->bitmap);
+
+ sched_info_queued(p);
+ list_add_tail(&p->run_list, array->queue + p->prio);
+ __set_bit(p->prio, array->bitmap);
+ p->array = array;
+}
/*
* Put task to the end of the run list without the overhead of dequeue
* followed by enqueue.
@@ -2668,7 +2690,7 @@
struct list_head *queue;
unsigned long long now;
unsigned long run_time;
- int cpu, idx;
+ int cpu, idx, prio;
/*
* Test if we are atomic. Since do_exit() needs to call into
@@ -2787,9 +2809,19 @@
delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
array = next->array;
- dequeue_task(next, array);
+ prio = next->prio;
recalc_task_prio(next, next->timestamp + delta);
- enqueue_task(next, array);
+
+ if (unlikely(prio != next->prio))
+ {
+ change_queue_task(next, array, prio);
+ schedstat_inc(rq, sched_prio_changed);
+ }
+ else
+ {
+ requeue_task(next, array);
+ schedstat_inc(rq, sched_prio_unchanged);
+ }
}
next->activated = 0;
switch_tasks:
next reply other threads:[~2005-05-19 16:56 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-05-19 16:56 chen Shang [this message]
2005-05-20 3:26 ` [PATCH] kernel <linux-2.6.11.10> kernel/sched.c Nick Piggin
2005-05-20 4:17 ` chen Shang
2005-05-20 4:32 ` Lee Revell
2005-05-20 5:13 ` Nick Piggin
2005-05-20 7:12 ` chen Shang
2005-05-20 7:21 ` Nick Piggin
2005-05-20 7:36 ` Con Kolivas
2005-05-20 13:41 ` chen Shang
2005-05-20 9:49 ` Ingo Molnar
2005-05-20 10:40 ` Con Kolivas
2005-05-20 11:34 ` Ingo Molnar
2005-05-22 4:41 ` Chen Shang
2005-05-23 7:11 ` Ingo Molnar
2005-05-23 14:45 ` Chen Shang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=855e4e4605051909561f47351@mail.gmail.com \
--to=shangcs@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=rml@tech9.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.