All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Mike Galbraith <efault@gmx.de>
Cc: Ingo Molnar <mingo@elte.hu>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH 0/4] pending scheduler updates
Date: Wed, 22 Oct 2008 19:38:14 +0200	[thread overview]
Message-ID: <1224697094.20069.9.camel@twins> (raw)
In-Reply-To: <1224671528.7511.11.camel@marge.simson.net>

On Wed, 2008-10-22 at 12:32 +0200, Mike Galbraith wrote:
> On Wed, 2008-10-22 at 12:03 +0200, Mike Galbraith wrote:
> 
> > It has positive effects too, but IMHO, the bad outweigh the good.
> 
> BTW, most dramatic on the other end of the spectrum is pgsql+oltp.  With
> preemption as is, it collapses as load climbs to heavy with preemption
> knobs at stock.  Postgres uses user-land spinlocks and _appears_ to wake
> others while these are still held.  For this load, there is such a thing
> as too much short-term fairness, preempting lock holder creates nasty
> gaggle of contended lock spinners.  It's curable with knobs, and I think
> it's postgres's own fault, but may be wrong.
> 
> With that patch, pgsql+oltp scales perfectly.

Are we talking about this patch, which re-instates the vruntime based
wakeup-preemption ?

---
Subject: sched: fix wakeup preemption
From: Peter Zijlstra <a.p.zijlstra@chello.nl>

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched_fair.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 92 insertions(+), 6 deletions(-)

Index: linux-2.6/kernel/sched_fair.c
===================================================================
--- linux-2.6.orig/kernel/sched_fair.c
+++ linux-2.6/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity *paren
 	return se->parent;
 }
 
+/* return depth at which a sched entity is present in the hierarchy */
+static inline int depth_se(struct sched_entity *se)
+{
+	int depth = 0;
+
+	for_each_sched_entity(se)
+		depth++;
+
+	return depth;
+}
+
+static void
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
+{
+	int se_depth, pse_depth;
+
+	/*
+	 * preemption test can be made between sibling entities who are in the
+	 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
+	 * both tasks until we find their ancestors who are siblings of common
+	 * parent.
+	 */
+
+	/* First walk up until both entities are at same depth */
+	se_depth = depth_se(*se);
+	pse_depth = depth_se(*pse);
+
+	while (se_depth > pse_depth) {
+		se_depth--;
+		*se = parent_entity(*se);
+	}
+
+	while (pse_depth > se_depth) {
+		pse_depth--;
+		*pse = parent_entity(*pse);
+	}
+
+	while (!is_same_group(*se, *pse)) {
+		*se = parent_entity(*se);
+		*pse = parent_entity(*pse);
+	}
+}
+
 #else	/* CONFIG_FAIR_GROUP_SCHED */
 
 static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *paren
 	return NULL;
 }
 
+static inline void
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
+{
+}
+
 #endif	/* CONFIG_FAIR_GROUP_SCHED */
 
 
@@ -1244,13 +1292,42 @@ static unsigned long wakeup_gran(struct 
 	 * More easily preempt - nice tasks, while not making it harder for
 	 * + nice tasks.
 	 */
-	if (sched_feat(ASYM_GRAN))
-		gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
+	if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
+		gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
 
 	return gran;
 }
 
 /*
+ * Should 'se' preempt 'curr'.
+ *
+ *             |s1
+ *        |s2
+ *   |s3
+ *         g
+ *      |<--->|c
+ *
+ *  w(c, s1) = -1
+ *  w(c, s2) =  0
+ *  w(c, s3) =  1
+ *
+ */
+static int
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
+{
+	s64 gran, vdiff = curr->vruntime - se->vruntime;
+
+	if (vdiff < 0)
+		return -1;
+
+	gran = wakeup_gran(curr);
+	if (vdiff > gran)
+		return 1;
+
+	return 0;
+}
+
+/*
  * Preempt the current task with a newly woken task if needed:
  */
 static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
@@ -1258,7 +1335,6 @@ static void check_preempt_wakeup(struct 
 	struct task_struct *curr = rq->curr;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
 	struct sched_entity *se = &curr->se, *pse = &p->se;
-	s64 delta_exec;
 
 	if (unlikely(rt_prio(p->prio))) {
 		update_rq_clock(rq);
@@ -1296,9 +1372,19 @@ static void check_preempt_wakeup(struct 
 		return;
 	}
 
-	delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
-	if (delta_exec > wakeup_gran(pse))
-		resched_task(curr);
+	find_matching_se(&se, &pse);
+
+	while (se) {
+		BUG_ON(!pse);
+
+		if (wakeup_preempt_entity(se, pse) == 1) {
+			resched_task(curr);
+			break;
+		}
+
+		se = parent_entity(se);
+		pse = parent_entity(pse);
+	}
 }
 
 static struct task_struct *pick_next_task_fair(struct rq *rq)



  parent reply	other threads:[~2008-10-22 17:49 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-17 17:27 [PATCH 0/4] pending scheduler updates Peter Zijlstra
2008-10-17 17:27 ` [PATCH 1/4] sched: optimize group load balancer Peter Zijlstra
2008-10-17 17:27 ` [PATCH 2/4] sched: fair scheduler should not resched rt tasks Peter Zijlstra
2008-10-17 17:27 ` [PATCH 3/4] sched: revert back to per-rq vruntime Peter Zijlstra
2008-10-17 17:27 ` [PATCH 4/4] sched: fix wakeup preemption Peter Zijlstra
2008-10-20 21:57   ` Chris Friesen
2008-10-20 12:05 ` [PATCH 0/4] pending scheduler updates Ingo Molnar
2008-10-21 17:35   ` Srivatsa Vaddagiri
2008-10-22  9:40     ` Ingo Molnar
2008-10-22 10:03       ` Mike Galbraith
2008-10-22 10:32         ` Mike Galbraith
2008-10-22 12:10           ` Ingo Molnar
2008-10-22 12:38             ` Mike Galbraith
2008-10-22 12:42               ` Ingo Molnar
2008-10-22 13:05                 ` Mike Galbraith
2008-10-22 17:38           ` Peter Zijlstra [this message]
2008-10-22 17:56             ` Mike Galbraith

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1224697094.20069.9.camel@twins \
    --to=a.p.zijlstra@chello.nl \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.