All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [git] CFS-devel, group scheduler, fixes
@ 2007-09-18 19:36 dimm
  2007-09-18 20:16 ` Ingo Molnar
  2007-09-18 20:22 ` Ingo Molnar
  0 siblings, 2 replies; 37+ messages in thread
From: dimm @ 2007-09-18 19:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Srivatsa Vaddagiri


[ well, don't expect to find here anything like RDCFS
(no, 'D' does not stand for 'dumb'!). I was focused
on more prosaic things in the mean time so just
didn't have time for writing it.. ]

here is a few cleanup/simplification/optimization(s)
based on the recent modifications in the sched-dev tree.

(1) optimize task_new_fair()
(2) simplify yield_task()
(3) rework enqueue/dequeue_entity() to get rid of
sched_class::set_curr_task()

additionally, the changes somewhat decrease code size:

   text    data     bss     dec     hex filename
  43538    5398      48   48984    bf58 build/kernel/sched.o.before
  43250    5390      48   48688    be30 build/kernel/sched.o

(SMP + lots of debugging options but, I guess, in this case the diff
should remain visible for any combination).

---

(1)

due to the fact that we no longer keep the 'current' within the tree,
dequeue/enqueue_entity() is useless for the 'current' in task_new_fair().
We are about to reschedule and sched_class->put_prev_task() will put
the 'current' back into the tree, based on its new key.

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>

---
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6e52d5a..5a244e2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -969,10 +969,11 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 
 	if (sysctl_sched_child_runs_first &&
 			curr->vruntime < se->vruntime) {
-
-		dequeue_entity(cfs_rq, curr, 0);
+		/*
+ 		 * Upon rescheduling, sched_class::put_prev_task() will place
+ 		 * 'current' within the tree based on its new key value.
+ 		 */
 		swap(curr->vruntime, se->vruntime);
-		enqueue_entity(cfs_rq, curr, 0);
 	}
 
 	update_stats_enqueue(cfs_rq, se);

---

Dmitry




^ permalink raw reply related	[flat|nested] 37+ messages in thread
* Re: [git] CFS-devel, group scheduler, fixes
@ 2007-09-18 19:56 Dmitry Adamushko
  2007-09-18 20:18 ` Ingo Molnar
  0 siblings, 1 reply; 37+ messages in thread
From: Dmitry Adamushko @ 2007-09-18 19:56 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Srivatsa Vaddagiri


(3)

rework enqueue/dequeue_entity() to get rid of sched_class::set_curr_task().
This simplifies sched_setscheduler(), rt_mutex_setprio() and sched_move_tasks().

Signed-off-by : Dmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by : Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>

---
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3728cd6..1094804 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -870,7 +870,6 @@ struct sched_class {
 			struct sched_domain *sd, enum cpu_idle_type idle,
 			int *all_pinned, int *this_best_prio);
 
-	void (*set_curr_task) (struct rq *rq);
 	void (*task_tick) (struct rq *rq, struct task_struct *p);
 	void (*task_new) (struct rq *rq, struct task_struct *p);
 };
diff --git a/kernel/sched.c b/kernel/sched.c
index 361fad8..bc1a625 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3910,8 +3910,8 @@ EXPORT_SYMBOL(sleep_on_timeout);
  */
 void rt_mutex_setprio(struct task_struct *p, int prio)
 {
-	int oldprio, on_rq, running;
 	unsigned long flags;
+	int oldprio, on_rq;
 	struct rq *rq;
 
 	BUG_ON(prio < 0 || prio > MAX_PRIO);
@@ -3921,12 +3921,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
 	oldprio = p->prio;
 	on_rq = p->se.on_rq;
-	running = task_running(rq, p);
-	if (on_rq) {
+	if (on_rq)
 		dequeue_task(rq, p, 0);
-		if (running)
-			p->sched_class->put_prev_task(rq, p);
-	}
 
 	if (rt_prio(prio))
 		p->sched_class = &rt_sched_class;
@@ -3936,15 +3932,13 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	p->prio = prio;
 
 	if (on_rq) {
-		if (running)
-			p->sched_class->set_curr_task(rq);
 		enqueue_task(rq, p, 0);
 		/*
 		 * Reschedule if we are currently running on this runqueue and
 		 * our priority decreased, or if we are not currently running on
 		 * this runqueue and our priority is higher than the current's
 		 */
-		if (running) {
+		if (task_running(rq, p)) {
 			if (p->prio > oldprio)
 				resched_task(rq->curr);
 		} else {
@@ -4150,7 +4144,7 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
 int sched_setscheduler(struct task_struct *p, int policy,
 		       struct sched_param *param)
 {
-	int retval, oldprio, oldpolicy = -1, on_rq, running;
+	int retval, oldprio, oldpolicy = -1, on_rq;
 	unsigned long flags;
 	struct rq *rq;
 
@@ -4232,24 +4226,20 @@ recheck:
 	}
 	update_rq_clock(rq);
 	on_rq = p->se.on_rq;
-	running = task_running(rq, p);
-	if (on_rq) {
+	if (on_rq)
 		deactivate_task(rq, p, 0);
-		if (running)
-			p->sched_class->put_prev_task(rq, p);
-	}
+
 	oldprio = p->prio;
 	__setscheduler(rq, p, policy, param->sched_priority);
+
 	if (on_rq) {
-		if (running)
-			p->sched_class->set_curr_task(rq);
 		activate_task(rq, p, 0);
 		/*
 		 * Reschedule if we are currently running on this runqueue and
 		 * our priority decreased, or if we are not currently running on
 		 * this runqueue and our priority is higher than the current's
 		 */
-		if (running) {
+		if (task_running(rq, p)) {
 			if (p->prio > oldprio)
 				resched_task(rq->curr);
 		} else {
@@ -6853,19 +6843,13 @@ static void sched_move_task(struct container_subsys *ss, struct container *cont,
 	running = task_running(rq, tsk);
 	on_rq = tsk->se.on_rq;
 
-	if (on_rq) {
+	if (on_rq)
 		dequeue_task(rq, tsk, 0);
-		if (unlikely(running))
-			tsk->sched_class->put_prev_task(rq, tsk);
-	}
 
 	set_task_cfs_rq(tsk);
 
-	if (on_rq) {
-		if (unlikely(running))
-			tsk->sched_class->set_curr_task(rq);
+	if (on_rq)
 		enqueue_task(rq, tsk, 0);
-	}
 
 done:
 	task_rq_unlock(rq, &flags);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e65af8c..6539377 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -475,9 +475,20 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 }
 
 static void
-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+		int wakeup, int set_curr)
 {
 	/*
+ 	 * In case of the 'current'.
+ 	 */
+	if (unlikely(set_curr)) {
+		update_stats_curr_start(cfs_rq, se);
+		cfs_rq->curr = se;
+		account_entity_enqueue(cfs_rq, se);
+		return;
+	}
+
+	/*
 	 * Update the fair clock.
 	 */
 	update_curr(cfs_rq);
@@ -488,8 +499,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
 	}
 
 	update_stats_enqueue(cfs_rq, se);
-	if (se != cfs_rq->curr)
-		__enqueue_entity(cfs_rq, se);
+	__enqueue_entity(cfs_rq, se);
 	account_entity_enqueue(cfs_rq, se);
 }
 
@@ -509,8 +519,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 		}
 	}
 #endif
-	if (se != cfs_rq->curr)
+	if (likely(se != cfs_rq->curr))
 		__dequeue_entity(cfs_rq, se);
+	else {
+		update_stats_curr_end(cfs_rq, se);
+		cfs_rq->curr = NULL;
+	}
 	account_entity_dequeue(cfs_rq, se);
 }
 
@@ -692,12 +706,17 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
+	int set_curr = 0;
+
+	/* Are we enqueuing the current task? */
+	if (unlikely(task_running(rq, p)))
+		set_curr = 1;
 
 	for_each_sched_entity(se) {
 		if (se->on_rq)
 			break;
 		cfs_rq = cfs_rq_of(se);
-		enqueue_entity(cfs_rq, se, wakeup);
+		enqueue_entity(cfs_rq, se, wakeup, set_curr);
 	}
 }
 
@@ -983,29 +1002,6 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 	resched_task(rq->curr);
 }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-/* Account for a task changing its policy or group.
- *
- * This routine is mostly called to set cfs_rq->curr field when a task
- * migrates between groups/classes.
- */
-static void set_curr_task_fair(struct rq *rq)
-{
-	struct sched_entity *se = &rq->curr->se;
-
-	for_each_sched_entity(se)
-		set_next_entity(cfs_rq_of(se), se);
-}
-#else
-static void set_curr_task_fair(struct rq *rq)
-{
-	struct sched_entity *se = &rq->curr->se;
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
-	cfs_rq->curr = se;
-}
-#endif
-
 /*
  * All the scheduling class methods:
  */
@@ -1021,7 +1017,6 @@ struct sched_class fair_sched_class __read_mostly = {
 
 	.load_balance		= load_balance_fair,
 
-	.set_curr_task          = set_curr_task_fair,
 	.task_tick		= task_tick_fair,
 	.task_new		= task_new_fair,
 };
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 5ebf829..3503fb2 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -50,10 +50,6 @@ static void task_tick_idle(struct rq *rq, struct task_struct *curr)
 {
 }
 
-static void set_curr_task_idle(struct rq *rq)
-{
-}
-
 /*
  * Simple, special scheduling class for the per-CPU idle tasks:
  */
@@ -70,7 +66,6 @@ static struct sched_class idle_sched_class __read_mostly = {
 
 	.load_balance		= load_balance_idle,
 
-	.set_curr_task          = set_curr_task_idle,
 	.task_tick		= task_tick_idle,
 	/* no .task_new for idle tasks */
 };
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b86944c..3c77c03 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -218,10 +218,6 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p)
 	}
 }
 
-static void set_curr_task_rt(struct rq *rq)
-{
-}
-
 static struct sched_class rt_sched_class __read_mostly = {
 	.enqueue_task		= enqueue_task_rt,
 	.dequeue_task		= dequeue_task_rt,
@@ -234,6 +230,5 @@ static struct sched_class rt_sched_class __read_mostly = {
 
 	.load_balance		= load_balance_rt,
 
-	.set_curr_task          = set_curr_task_rt,
 	.task_tick		= task_tick_rt,
 };

---

Dmitry



^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [git] CFS-devel, group scheduler, fixes
@ 2007-09-18 19:46 Dmitry Adamushko
  2007-09-18 20:17 ` Ingo Molnar
  0 siblings, 1 reply; 37+ messages in thread
From: Dmitry Adamushko @ 2007-09-18 19:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Srivatsa Vaddagiri


(2)

the 'p' (task_struct) parameter in the sched_class :: yield_task()
is redundant as the caller is always the 'current'. Get rid of it.

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>

---
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9fd936f..3728cd6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -857,7 +857,7 @@ struct sched_class {
 
 	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
 	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
-	void (*yield_task) (struct rq *rq, struct task_struct *p);
+	void (*yield_task) (struct rq *rq);
 
 	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 046dae1..361fad8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4535,7 +4535,7 @@ asmlinkage long sys_sched_yield(void)
 	if (unlikely(rq->nr_running == 1))
 		schedstat_inc(rq, yld_act_empty);
 	else
-		current->sched_class->yield_task(rq, current);
+		current->sched_class->yield_task(rq);
 
 	/*
 	 * Since we are going to call schedule() anyway, there's
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5a244e2..9b982ef 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -723,10 +723,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
 /*
  * sched_yield() support is very simple - we dequeue and enqueue
  */
-static void yield_task_fair(struct rq *rq, struct task_struct *p)
+static void yield_task_fair(struct rq *rq)
 {
+	struct task_struct *curr = rq->curr;
+	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+
 	if (!sysctl_sched_yield_bug_workaround) {
-		struct cfs_rq *cfs_rq = task_cfs_rq(p);
 		__update_rq_clock(rq);
 
 		/*
@@ -737,7 +739,6 @@ static void yield_task_fair(struct rq *rq, struct task_struct *p)
 	}
 
 	if (sysctl_sched_yield_bug_workaround == 1) {
-		struct cfs_rq *cfs_rq = task_cfs_rq(p);
 		struct sched_entity *next;
 
 		/*
@@ -757,7 +758,7 @@ static void yield_task_fair(struct rq *rq, struct task_struct *p)
 		/*
 		 * Minimally necessary key value to be the second in the tree:
 		 */
-		p->se.vruntime = next->vruntime +
+		curr->se.vruntime = next->vruntime +
 					sysctl_sched_yield_granularity;
 
 		/*
@@ -770,7 +771,7 @@ static void yield_task_fair(struct rq *rq, struct task_struct *p)
 	/*
 	 * Just reschedule, do nothing else:
 	 */
-	resched_task(p);
+	resched_task(curr);
 }
 
 /*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 45b339f..b86944c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -59,9 +59,9 @@ static void requeue_task_rt(struct rq *rq, struct task_struct *p)
 }
 
 static void
-yield_task_rt(struct rq *rq, struct task_struct *p)
+yield_task_rt(struct rq *rq)
 {
-	requeue_task_rt(rq, p);
+	requeue_task_rt(rq, rq->curr);
 }
 
 /*

---

Dmitry





^ permalink raw reply related	[flat|nested] 37+ messages in thread
* [git] CFS-devel, group scheduler, fixes
@ 2007-09-15 13:06 Ingo Molnar
  0 siblings, 0 replies; 37+ messages in thread
From: Ingo Molnar @ 2007-09-15 13:06 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Mike Galbraith, Srivatsa Vaddagiri, Dhaval Giani,
	Dmitry Adamushko


The latest sched-devel.git tree can be pulled from:

   git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-devel.git

people were busy sending patches, so there's lots of updates since the 
first announcement of the cfs-devel.git tree four days ago:

  include/linux/sched.h   |    4 
  init/Kconfig            |    9 +
  kernel/sched.c          |  374 +++++++++++++++++++++++++++++++++++++++++++-----
  kernel/sched_debug.c    |   22 +-
  kernel/sched_fair.c     |  191 ++++++++++++++++++++----
  kernel/sched_idletask.c |    5 
  kernel/sched_rt.c       |    5 
  kernel/sysctl.c         |   19 ++
  8 files changed, 552 insertions(+), 77 deletions(-)

the most user-noticeable improvement should be the latency/interactivity 
fixes from Mike Galbraith and Peter Zijlstra. The biggest (and most 
complex) item is the new group scheduler code from Srivatsa Vaddagiri. 
There's also speedups from Dmitry Adamushko and various cleanups and 
fixes. Also added in the yield workaround that should address the 
regression reported by Antoine Martin.

Changes:

- the biggest item is the addition of the group scheduler from Srivatsa 
  Vaddagiri - this is not configurable yet, it depends on
  CONFIG_CONTAINERS. It causes no overhead on !CONFIG_CONTAINERS. This
  code clearly seems mature now, hopefully the container bits go
  upstream in 2.6.24 too. Srivatsa did lots of heavy lifting in the past 
  few months, and this final bit of code that moves in all the 
  infrastructure changes almost nothing in the core scheduler.

- a triplet of nice simplifications from Dmitry Adamushko. Most notably
  Dmitry got rid of se->fair_key which shaves 8 bytes of task_struct and
  gives further speedup. Thanks Dmitry!

- continued refinements to the SMP ->vruntime code and timeslicing by 
  Peter Zijstra and Mike Galbraith.

As usual, bugreports, fixes and suggestions are welcome and please 
holler if some patch went missing in action.

	Ingo

------------------>
Dmitry Adamushko (3):
      sched: clean up struct load_stat
      sched: clean up schedstat block in dequeue_entity()
      sched: optimize away ->fair_key

Matthias Kaehlcke (1):
      sched: use list_for_each_entry_safe() in __wake_up_common()

Mike Galbraith (1):
      sched: fix SMP migration latencies

Peter Zijlstra (7):
      sched: simplify SCHED_FEAT_* code
      sched: new task placement for vruntime
      sched: simplify adaptive latency
      sched: clean up new task placement
      sched: add tree based averages
      sched: handle vruntime overflow
      sched: better min_vruntime tracking

Srivatsa Vaddagiri (1):
      sched: group-scheduler core

Ingo Molnar (27):
      sched: fix new-task method
      sched: resched task in task_new_fair()
      sched: small sched_debug cleanup
      sched: debug: track maximum 'slice'
      sched: uniform tunings
      sched: use constants if !CONFIG_SCHED_DEBUG
      sched: remove stat_gran
      sched: remove precise CPU load
      sched: remove precise CPU load calculations #2
      sched: track cfs_rq->curr on !group-scheduling too
      sched: cleanup: simplify cfs_rq_curr() methods
      sched: uninline __enqueue_entity()/__dequeue_entity()
      sched: speed up update_load_add/_sub()
      sched: clean up calc_weighted()
      sched: introduce se->vruntime
      sched: move sched_feat() definitions
      sched: optimize vruntime based scheduling
      sched: simplify check_preempt() methods
      sched: wakeup granularity fix
      sched: add se->vruntime debugging
      sched: sync up ->min_vruntime when going idle
      sched: add more vruntime statistics
      sched: debug: update exec_clock only when SCHED_DEBUG
      sched: remove wait_runtime limit
      sched: remove wait_runtime fields and features
      sched: x86: allow single-depth wchan output
      sched: yield workaround

 arch/i386/Kconfig       |   11 
 include/linux/sched.h   |   21 -
 init/Kconfig            |    9 
 kernel/sched.c          |  558 +++++++++++++++++++++++++------------
 kernel/sched_debug.c    |  100 +++---
 kernel/sched_fair.c     |  716 +++++++++++++++++++-----------------------------
 kernel/sched_idletask.c |    5 
 kernel/sched_rt.c       |    5 
 kernel/sysctl.c         |   33 +-
 9 files changed, 765 insertions(+), 693 deletions(-)

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2007-09-24 16:48 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-09-18 19:36 [git] CFS-devel, group scheduler, fixes dimm
2007-09-18 20:16 ` Ingo Molnar
2007-09-19  6:03   ` Tong Li
2007-09-19  6:28     ` Mike Galbraith
2007-09-19  7:51       ` Mike Galbraith
2007-09-19  8:42         ` Mike Galbraith
2007-09-19 17:06           ` Tong Li
2007-09-20  4:55             ` Mike Galbraith
2007-09-20  7:15               ` Mike Galbraith
2007-09-20  7:51                 ` Ingo Molnar
2007-09-20  8:11                   ` Mike Galbraith
2007-09-22  3:27                     ` Tong Li
2007-09-22 10:01                       ` Mike Galbraith
2007-09-23  7:14                         ` Mike Galbraith
2007-09-23 11:37                           ` Mike Galbraith
     [not found]                           ` <20070923115847.GA13061@elte.hu>
2007-09-23 15:53                             ` [git] CFS-devel, updates Mike Galbraith
2007-09-24  6:21                           ` [git] CFS-devel, group scheduler, fixes Tong Li
2007-09-24 10:10                             ` Mike Galbraith
2007-09-24 10:24                               ` Peter Zijlstra
2007-09-24 10:42                                 ` Mike Galbraith
2007-09-24 11:08                                   ` Peter Zijlstra
2007-09-24 11:43                                     ` Mike Galbraith
2007-09-24 11:22                                   ` Mike Galbraith
2007-09-24 11:51                                     ` Peter Zijlstra
2007-09-24 16:43                                       ` Tong Li
2007-09-20 19:48                 ` Willy Tarreau
2007-09-21  2:40                   ` Mike Galbraith
2007-09-21  3:11                     ` Willy Tarreau
2007-09-19 19:35     ` Siddha, Suresh B
2007-09-19 20:58       ` Tong Li
2007-09-18 20:22 ` Ingo Molnar
2007-09-19  3:55   ` Srivatsa Vaddagiri
  -- strict thread matches above, loose matches on Subject: below --
2007-09-18 19:56 Dmitry Adamushko
2007-09-18 20:18 ` Ingo Molnar
2007-09-18 19:46 Dmitry Adamushko
2007-09-18 20:17 ` Ingo Molnar
2007-09-15 13:06 Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.