All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Ingo Molnar <mingo@elte.hu>
Cc: linux-kernel@vger.kernel.org, Gautham R Shenoy <ego@in.ibm.com>,
	Andreas Herrmann <andreas.herrmann3@amd.com>,
	Balbir Singh <balbir@in.ibm.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 06/14] sched: scale down cpu_power due to RT tasks
Date: Thu, 03 Sep 2009 15:21:51 +0200	[thread overview]
Message-ID: <20090903132212.601077313@chello.nl> (raw)
In-Reply-To: 20090903132145.482814810@chello.nl

[-- Attachment #1: sched-lb-5.patch --]
[-- Type: text/plain, Size: 5311 bytes --]

Keep an average on the amount of time spend on RT tasks and use that
fraction to scale down the cpu_power for regular tasks.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/sched.h |    1 
 kernel/sched.c        |   64 +++++++++++++++++++++++++++++++++++++++++++++++---
 kernel/sched_rt.c     |    6 +---
 kernel/sysctl.c       |    8 ++++++
 4 files changed, 72 insertions(+), 7 deletions(-)

Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1863,6 +1863,7 @@ extern unsigned int sysctl_sched_child_r
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_time_avg;
 extern unsigned int sysctl_timer_migration;
 
 int sched_nr_latency_handler(struct ctl_table *table, int write,
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -627,6 +627,9 @@ struct rq {
 
 	struct task_struct *migration_thread;
 	struct list_head migration_queue;
+
+	u64 rt_avg;
+	u64 age_stamp;
 #endif
 
 	/* calc_load related fields */
@@ -863,6 +866,14 @@ unsigned int sysctl_sched_shares_ratelim
 unsigned int sysctl_sched_shares_thresh = 4;
 
 /*
+ * period over which we average the RT time consumption, measured
+ * in ms.
+ *
+ * default: 1s
+ */
+const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
+
+/*
  * period over which we measure -rt task cpu usage in us.
  * default: 1s
  */
@@ -1280,12 +1291,37 @@ void wake_up_idle_cpu(int cpu)
 }
 #endif /* CONFIG_NO_HZ */
 
+static u64 sched_avg_period(void)
+{
+	return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
+}
+
+static void sched_avg_update(struct rq *rq)
+{
+	s64 period = sched_avg_period();
+
+	while ((s64)(rq->clock - rq->age_stamp) > period) {
+		rq->age_stamp += period;
+		rq->rt_avg /= 2;
+	}
+}
+
+static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
+{
+	rq->rt_avg += rt_delta;
+	sched_avg_update(rq);
+}
+
 #else /* !CONFIG_SMP */
 static void resched_task(struct task_struct *p)
 {
 	assert_spin_locked(&task_rq(p)->lock);
 	set_tsk_need_resched(p);
 }
+
+static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
+{
+}
 #endif /* CONFIG_SMP */
 
 #if BITS_PER_LONG == 32
@@ -3699,7 +3735,7 @@ static inline int check_power_save_busie
 }
 #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
 
-unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = cpumask_weight(sched_domain_span(sd));
 	unsigned long smt_gain = sd->smt_gain;
@@ -3709,6 +3745,24 @@ unsigned long __weak arch_smt_gain(struc
 	return smt_gain;
 }
 
+unsigned long scale_rt_power(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	u64 total, available;
+
+	sched_avg_update(rq);
+
+	total = sched_avg_period() + (rq->clock - rq->age_stamp);
+	available = total - rq->rt_avg;
+
+	if (unlikely((s64)total < SCHED_LOAD_SCALE))
+		total = SCHED_LOAD_SCALE;
+
+	total >>= SCHED_LOAD_SHIFT;
+
+	return div_u64(available, total);
+}
+
 static void update_cpu_power(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = cpumask_weight(sched_domain_span(sd));
@@ -3719,11 +3773,15 @@ static void update_cpu_power(struct sche
 	/* here we could scale based on cpufreq */
 
 	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
-		power *= arch_smt_gain(sd, cpu);
+		power *= arch_scale_smt_power(sd, cpu);
 		power >>= SCHED_LOAD_SHIFT;
 	}
 
-	/* here we could scale based on RT time */
+	power *= scale_rt_power(cpu);
+	power >>= SCHED_LOAD_SHIFT;
+
+	if (!power)
+		power = 1;
 
 	if (power != old) {
 		sdg->__cpu_power = power;
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -615,6 +615,8 @@ static void update_curr_rt(struct rq *rq
 	curr->se.exec_start = rq->clock;
 	cpuacct_charge(curr, delta_exec);
 
+	sched_rt_avg_update(rq, delta_exec);
+
 	if (!rt_bandwidth_enabled())
 		return;
 
@@ -887,8 +889,6 @@ static void enqueue_task_rt(struct rq *r
 
 	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
-
-	inc_cpu_load(rq, p->se.load.weight);
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
@@ -899,8 +899,6 @@ static void dequeue_task_rt(struct rq *r
 	dequeue_rt_entity(rt_se);
 
 	dequeue_pushable_task(rq, p);
-
-	dec_cpu_load(rq, p->se.load.weight);
 }
 
 /*
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -332,6 +332,14 @@ static struct ctl_table kern_table[] = {
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_time_avg",
+		.data		= &sysctl_sched_time_avg,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "timer_migration",
 		.data		= &sysctl_timer_migration,
 		.maxlen		= sizeof(unsigned int),

-- 


  parent reply	other threads:[~2009-09-03 13:24 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-03 13:21 [RFC][PATCH 00/14] load-balancing and cpu_power -v3 Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 01/14] sched: restore __cpu_power to a straight sum of power Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 02/14] sched: SD_PREFER_SIBLING Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 03/14] sched: update the cpu_power sum during load-balance Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 04/14] sched: add smt_gain Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 05/14] sched: dynamic cpu_power Peter Zijlstra
2009-09-03 13:21 ` Peter Zijlstra [this message]
2009-09-03 13:21 ` [RFC][PATCH 07/14] sched: try to deal with low capacity Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 08/14] sched: remove reciprocal for cpu_power Peter Zijlstra
2009-09-03 13:59   ` Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 09/14] x86: move APERF/MPERF into a X86_FEATURE Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 10/14] x86: generic aperf/mperf code Peter Zijlstra
2009-09-04  9:19   ` Thomas Renninger
2009-09-04  9:25     ` Peter Zijlstra
2009-09-04  9:27       ` Peter Zijlstra
2009-09-04  9:34         ` Thomas Renninger
2009-09-04 14:22         ` Dave Jones
2009-09-04 14:42           ` Peter Zijlstra
2009-09-04 17:45             ` H. Peter Anvin
2009-09-03 13:21 ` [RFC][PATCH 11/14] sched: provide arch_scale_freq_power Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 12/14] x86: sched: provide arch implementations using aperf/mperf Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 13/14] sched: cleanup wake_idle power saving Peter Zijlstra
2009-09-03 13:21 ` [RFC][PATCH 14/14] sched: cleanup wake_idle Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090903132212.601077313@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=andreas.herrmann3@amd.com \
    --cc=balbir@in.ibm.com \
    --cc=ego@in.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.