From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-kernel@vger.kernel.org
Cc: mingo@elte.hu, efault@gmx.de, vatsa@in.ibm.com,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 4/8] sched: re-instate vruntime based wakeup preemption
Date: Fri, 24 Oct 2008 11:06:15 +0200 [thread overview]
Message-ID: <20081024091109.667495895@chello.nl> (raw)
In-Reply-To: 20081024090611.936552213@chello.nl
[-- Attachment #1: sched-fix-wakeup-preempt.patch --]
[-- Type: text/plain, Size: 3737 bytes --]
The advantage is that vruntime based wakeup preemption has a better
conceptual model. Here wakeup_gran = 0 means: preempt when 'fair'.
Therefore wakeup_gran is the granularity of unfairness we allow in order
to make progress.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/sched_fair.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 92 insertions(+), 6 deletions(-)
Index: linux-2.6/kernel/sched_fair.c
===================================================================
--- linux-2.6.orig/kernel/sched_fair.c
+++ linux-2.6/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity *paren
return se->parent;
}
+/* return depth at which a sched entity is present in the hierarchy */
+static inline int depth_se(struct sched_entity *se)
+{
+ int depth = 0;
+
+ for_each_sched_entity(se)
+ depth++;
+
+ return depth;
+}
+
+static void
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
+{
+ int se_depth, pse_depth;
+
+ /*
+ * preemption test can be made between sibling entities who are in the
+ * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
+ * both tasks until we find their ancestors who are siblings of common
+ * parent.
+ */
+
+ /* First walk up until both entities are at same depth */
+ se_depth = depth_se(*se);
+ pse_depth = depth_se(*pse);
+
+ while (se_depth > pse_depth) {
+ se_depth--;
+ *se = parent_entity(*se);
+ }
+
+ while (pse_depth > se_depth) {
+ pse_depth--;
+ *pse = parent_entity(*pse);
+ }
+
+ while (!is_same_group(*se, *pse)) {
+ *se = parent_entity(*se);
+ *pse = parent_entity(*pse);
+ }
+}
+
#else /* CONFIG_FAIR_GROUP_SCHED */
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *paren
return NULL;
}
+static inline void
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
+{
+}
+
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -1244,13 +1292,42 @@ static unsigned long wakeup_gran(struct
* More easily preempt - nice tasks, while not making it harder for
* + nice tasks.
*/
- if (sched_feat(ASYM_GRAN))
- gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
+ if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
+ gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
return gran;
}
/*
+ * Should 'se' preempt 'curr'.
+ *
+ * |s1
+ * |s2
+ * |s3
+ * g
+ * |<--->|c
+ *
+ * w(c, s1) = -1
+ * w(c, s2) = 0
+ * w(c, s3) = 1
+ *
+ */
+static int
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
+{
+ s64 gran, vdiff = curr->vruntime - se->vruntime;
+
+ if (vdiff <= 0)
+ return -1;
+
+ gran = wakeup_gran(curr);
+ if (vdiff > gran)
+ return 1;
+
+ return 0;
+}
+
+/*
* Preempt the current task with a newly woken task if needed:
*/
static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
@@ -1258,7 +1335,6 @@ static void check_preempt_wakeup(struct
struct task_struct *curr = rq->curr;
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
struct sched_entity *se = &curr->se, *pse = &p->se;
- s64 delta_exec;
if (unlikely(rt_prio(p->prio))) {
update_rq_clock(rq);
@@ -1296,9 +1372,19 @@ static void check_preempt_wakeup(struct
return;
}
- delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
- if (delta_exec > wakeup_gran(pse))
- resched_task(curr);
+ find_matching_se(&se, &pse);
+
+ while (se) {
+ BUG_ON(!pse);
+
+ if (wakeup_preempt_entity(se, pse) == 1) {
+ resched_task(curr);
+ break;
+ }
+
+ se = parent_entity(se);
+ pse = parent_entity(pse);
+ }
}
static struct task_struct *pick_next_task_fair(struct rq *rq)
--
next prev parent reply other threads:[~2008-10-24 10:00 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-24 9:06 [PATCH 0/8] scheduler patches Peter Zijlstra
2008-10-24 9:06 ` [PATCH 1/8] sched: fix a find_busiest_group buglet Peter Zijlstra
2008-10-24 9:06 ` [PATCH 2/8] sched: more accurate min_vruntime accounting Peter Zijlstra
2008-10-24 9:06 ` [PATCH 3/8] sched: weaken sync hint Peter Zijlstra
2008-10-24 9:06 ` Peter Zijlstra [this message]
2008-10-24 9:06 ` [PATCH 5/8] sched: virtual time buddy preemption Peter Zijlstra
2008-10-24 9:06 ` [PATCH 6/8] sched: avg_vruntime Peter Zijlstra
2008-10-29 15:48 ` Peter Zijlstra
2008-11-01 18:13 ` Fabio Checconi
2008-10-24 9:06 ` [PATCH 7/8] sched: non-zero lag renice Peter Zijlstra
2008-10-24 17:47 ` Chris Friesen
2008-10-24 20:28 ` Peter Zijlstra
2008-10-24 21:13 ` Chris Friesen
2008-10-24 9:06 ` [PATCH 8/8] use avg_vruntime for task placement Peter Zijlstra
2008-10-24 10:26 ` [PATCH 0/8] scheduler patches Ingo Molnar
2008-10-24 10:29 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081024091109.667495895@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=efault@gmx.de \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=vatsa@in.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.