From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
To: Linux Kernel <linux-kernel@vger.kernel.org>,
Suresh B Siddha <suresh.b.siddha@intel.com>,
Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>, Dipankar Sarma <dipankar@in.ibm.com>,
Balbir Singh <balbir@linux.vnet.ibm.com>,
Vatsa <vatsa@linux.vnet.ibm.com>,
Gautham R Shenoy <ego@in.ibm.com>,
Andi Kleen <andi@firstfloor.org>,
David Collier-Brown <davecb@sun.com>,
Tim Connors <tconnors@astro.swin.edu.au>,
Max Krasnyansky <maxk@qualcomm.com>,
Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Subject: [RFC PATCH v2 1/5] sched: load calculation for each group in sched domain
Date: Thu, 09 Oct 2008 17:39:25 +0530 [thread overview]
Message-ID: <20081009120924.27010.59999.stgit@drishya.in.ibm.com> (raw)
In-Reply-To: <20081009120705.27010.12857.stgit@drishya.in.ibm.com>
Add data structures for per group stats, and function
to calculate the required per group stats.
Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
---
kernel/sched.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 114 insertions(+), 0 deletions(-)
diff --git a/kernel/sched.c b/kernel/sched.c
index ad1962d..ab77937 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3064,6 +3064,120 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
return 0;
}
+/* Helper functions for find_busiest_group */
+
+int get_load_idx(struct sched_domain *sd, enum cpu_idle_type idle)
+{
+ if (idle == CPU_NOT_IDLE)
+ return sd->busy_idx;
+ else if (idle == CPU_NEWLY_IDLE)
+ return sd->newidle_idx;
+ else
+ return sd->idle_idx;
+}
+
+/* Struct to return group stats */
+
+struct group_loads {
+ struct sched_group *group;
+ unsigned long nr_running;
+ unsigned long load; /* Decay average load */
+ unsigned long load_per_cpu; /* Decay load / cpu_power */
+ unsigned long weighted_load; /* Instantaneous load (load.weight)*/
+ unsigned long avg_load_per_task; /* Instantaneous load/ nr_running */
+ unsigned int group_imbalance;
+ int local_group;
+ int balance_cpu;
+};
+
+/* Helper function to calculate basic group level stats */
+
+int get_group_loads(struct sched_group *group, int this_cpu,
+ const cpumask_t *valid_cpus, enum cpu_idle_type idle,
+ int load_idx,
+ struct group_loads *gl)
+{
+ struct rq *rq;
+ unsigned long load, min_load, max_load, avg_load_per_task_per_cpu;
+ int cpu;
+ int local_group = 0;
+ int first_idle_cpu = -1;
+ int need_balance = 1;
+
+ gl->group = group;
+ gl->nr_running = 0;
+ gl->load = 0;
+ gl->weighted_load = 0;
+ gl->avg_load_per_task = 0;
+ gl->group_imbalance = 0;
+ gl->balance_cpu = -1;
+ max_load = 0;
+ min_load = ~0UL;
+
+ gl->local_group = cpu_isset(this_cpu, group->cpumask);
+
+ for_each_cpu_mask_nr(cpu, group->cpumask) {
+ if (!cpu_isset(cpu, *valid_cpus))
+ continue;
+
+ rq = cpu_rq(cpu);
+
+ /* Bias balancing toward cpus of our domain */
+ if (gl->local_group) {
+ if (idle_cpu(cpu) && first_idle_cpu == -1)
+ first_idle_cpu = cpu;
+
+ load = target_load(cpu, load_idx);
+ } else {
+ load = source_load(cpu, load_idx);
+ if (load > max_load)
+ max_load = load;
+ if (load < min_load)
+ min_load = load;
+ }
+ gl->nr_running += rq->nr_running;
+ gl->load += load;
+ gl->weighted_load += weighted_cpuload(cpu);
+ gl->avg_load_per_task += cpu_avg_load_per_task(cpu);
+ }
+
+ /*
+ * Consider the group unbalanced when the imbalance is larger
+ * than the average weight of two tasks.
+ *
+ * APZ: with cgroup the avg task weight can vary wildly and
+ * might not be a suitable number - should we keep a
+ * normalized nr_running number somewhere that negates
+ * the hierarchy?
+ */
+
+ avg_load_per_task_per_cpu = sg_div_cpu_power(group,
+ gl->avg_load_per_task * SCHED_LOAD_SCALE);
+
+ if (!gl->local_group &&
+ ((max_load - min_load) > 2*avg_load_per_task_per_cpu))
+ gl->group_imbalance = 1;
+
+ if (local_group) {
+ if (first_idle_cpu != -1)
+ gl->balance_cpu = first_idle_cpu;
+ else
+ gl->balance_cpu = first_cpu(group->cpumask);
+
+ /*
+ * First idle cpu or the first cpu(busiest) in this sched group
+ * is eligible for doing load balancing at this and above
+ * domains. In the newly idle case, we will allow all the cpu's
+ * to do the newly idle load balance.
+ */
+ if (idle != CPU_NEWLY_IDLE && gl->balance_cpu != this_cpu)
+ need_balance = 0;
+ }
+ gl->load_per_cpu = sg_div_cpu_power(group, gl->load * SCHED_LOAD_SCALE);
+
+ return need_balance;
+}
+
/*
* find_busiest_group finds and returns the busiest CPU group within the
* domain. It calculates and returns the amount of weighted load which
next prev parent reply other threads:[~2008-10-09 12:05 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-09 12:09 [RFC PATCH v2 0/5] sched: modular find_busiest_group() Vaidyanathan Srinivasan
2008-10-09 12:09 ` Vaidyanathan Srinivasan [this message]
2008-10-09 12:09 ` [RFC PATCH v2 2/5] sched: calculate statistics for current load balance domain Vaidyanathan Srinivasan
2008-10-09 12:09 ` [RFC PATCH v2 3/5] sched: collect statistics required for powersave balance Vaidyanathan Srinivasan
2008-10-09 12:09 ` [RFC PATCH v2 4/5] sched: small imbalance corrections Vaidyanathan Srinivasan
2008-10-09 12:09 ` [RFC PATCH v2 5/5] sched: split find_busiest_group() Vaidyanathan Srinivasan
2008-10-09 14:19 ` [RFC PATCH v2 0/5] sched: modular find_busiest_group() Peter Zijlstra
2008-10-10 1:36 ` Vaidyanathan Srinivasan
2008-10-14 12:09 ` Peter Zijlstra
2008-10-14 13:07 ` Vaidyanathan Srinivasan
2008-10-14 13:25 ` Peter Zijlstra
2008-10-24 10:04 ` Vaidyanathan Srinivasan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081009120924.27010.59999.stgit@drishya.in.ibm.com \
--to=svaidy@linux.vnet.ibm.com \
--cc=a.p.zijlstra@chello.nl \
--cc=andi@firstfloor.org \
--cc=balbir@linux.vnet.ibm.com \
--cc=davecb@sun.com \
--cc=dipankar@in.ibm.com \
--cc=ego@in.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=maxk@qualcomm.com \
--cc=mingo@elte.hu \
--cc=suresh.b.siddha@intel.com \
--cc=tconnors@astro.swin.edu.au \
--cc=vatsa@linux.vnet.ibm.com \
--cc=venkatesh.pallipadi@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox