From: Heiko Carstens <heiko.carstens@de.ibm.com>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>,
Mike Galbraith <efault@gmx.de>, Ingo Molnar <mingo@elte.hu>,
Suresh Siddha <suresh.b.siddha@intel.com>,
Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: linux-kernel@vger.kernel.org,
Martin Schwidefsky <schwidefsky@de.ibm.com>,
Heiko Carstens <heiko.carstens@de.ibm.com>
Subject: [PATCH/RFC 3/5] [PATCH] sched: add book scheduling domain
Date: Thu, 12 Aug 2010 19:25:47 +0200 [thread overview]
Message-ID: <20100812172623.140578409@de.ibm.com> (raw)
In-Reply-To: 20100812172544.655648128@de.ibm.com
[-- Attachment #1: 03-sched-book.diff --]
[-- Type: text/plain, Size: 12431 bytes --]
From: Heiko Carstens <heiko.carstens@de.ibm.com>
On top of the SMT and MC scheduling domains this adds the BOOK scheduling
domain. This is useful for machines that have a four level cache hierarchy
and but do not fall into the NUMA category.
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
arch/s390/defconfig | 1
include/linux/sched.h | 19 +++++++
include/linux/topology.h | 6 ++
kernel/sched.c | 112 ++++++++++++++++++++++++++++++++++++++++++++---
kernel/sched_fair.c | 11 ++--
5 files changed, 137 insertions(+), 12 deletions(-)
diff -urpN linux-2.6/arch/s390/defconfig linux-2.6-patched/arch/s390/defconfig
--- linux-2.6/arch/s390/defconfig 2010-08-02 00:11:14.000000000 +0200
+++ linux-2.6-patched/arch/s390/defconfig 2010-08-11 13:47:23.000000000 +0200
@@ -248,6 +248,7 @@ CONFIG_64BIT=y
CONFIG_SMP=y
CONFIG_NR_CPUS=32
CONFIG_HOTPLUG_CPU=y
+# CONFIG_SCHED_BOOK is not set
CONFIG_COMPAT=y
CONFIG_SYSVIPC_COMPAT=y
CONFIG_AUDIT_ARCH=y
diff -urpN linux-2.6/include/linux/sched.h linux-2.6-patched/include/linux/sched.h
--- linux-2.6/include/linux/sched.h 2010-08-11 13:47:16.000000000 +0200
+++ linux-2.6-patched/include/linux/sched.h 2010-08-11 13:47:23.000000000 +0200
@@ -807,7 +807,9 @@ enum powersavings_balance_level {
MAX_POWERSAVINGS_BALANCE_LEVELS
};
-extern int sched_mc_power_savings, sched_smt_power_savings;
+extern int sched_smt_power_savings;
+extern int sched_mc_power_savings;
+extern int sched_book_power_savings;
static inline int sd_balance_for_mc_power(void)
{
@@ -820,11 +822,23 @@ static inline int sd_balance_for_mc_powe
return 0;
}
-static inline int sd_balance_for_package_power(void)
+static inline int sd_balance_for_book_power(void)
{
if (sched_mc_power_savings | sched_smt_power_savings)
return SD_POWERSAVINGS_BALANCE;
+ if (!sched_book_power_savings)
+ return SD_PREFER_SIBLING;
+
+ return 0;
+}
+
+static inline int sd_balance_for_package_power(void)
+{
+ if (sched_book_power_savings | sched_mc_power_savings |
+ sched_smt_power_savings)
+ return SD_POWERSAVINGS_BALANCE;
+
return SD_PREFER_SIBLING;
}
@@ -875,6 +889,7 @@ enum sched_domain_level {
SD_LV_NONE = 0,
SD_LV_SIBLING,
SD_LV_MC,
+ SD_LV_BOOK,
SD_LV_CPU,
SD_LV_NODE,
SD_LV_ALLNODES,
diff -urpN linux-2.6/include/linux/topology.h linux-2.6-patched/include/linux/topology.h
--- linux-2.6/include/linux/topology.h 2010-08-11 13:47:16.000000000 +0200
+++ linux-2.6-patched/include/linux/topology.h 2010-08-11 13:47:23.000000000 +0200
@@ -201,6 +201,12 @@ int arch_update_cpu_topology(void);
.balance_interval = 64, \
}
+#ifdef CONFIG_SCHED_BOOK
+#ifndef SD_BOOK_INIT
+#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!!
+#endif
+#endif /* CONFIG_SCHED_BOOK */
+
#ifdef CONFIG_NUMA
#ifndef SD_NODE_INIT
#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
diff -urpN linux-2.6/kernel/sched.c linux-2.6-patched/kernel/sched.c
--- linux-2.6/kernel/sched.c 2010-08-11 13:47:23.000000000 +0200
+++ linux-2.6-patched/kernel/sched.c 2010-08-11 13:47:23.000000000 +0200
@@ -6472,7 +6472,9 @@ static void sched_domain_node_span(int n
}
#endif /* CONFIG_NUMA */
-int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
+int sched_smt_power_savings;
+int sched_mc_power_savings;
+int sched_book_power_savings;
/*
* The cpus mask in sched_group and sched_domain hangs off the end.
@@ -6500,6 +6502,7 @@ struct s_data {
cpumask_var_t nodemask;
cpumask_var_t this_sibling_map;
cpumask_var_t this_core_map;
+ cpumask_var_t this_book_map;
cpumask_var_t send_covered;
cpumask_var_t tmpmask;
struct sched_group **sched_group_nodes;
@@ -6511,6 +6514,7 @@ enum s_alloc {
sa_rootdomain,
sa_tmpmask,
sa_send_covered,
+ sa_this_book_map,
sa_this_core_map,
sa_this_sibling_map,
sa_nodemask,
@@ -6564,6 +6568,31 @@ cpu_to_core_group(int cpu, const struct
}
#endif /* CONFIG_SCHED_MC */
+/*
+ * book sched-domains:
+ */
+#ifdef CONFIG_SCHED_BOOK
+static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
+
+static int
+cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
+ struct sched_group **sg, struct cpumask *mask)
+{
+ int group = cpu;
+#ifdef CONFIG_SCHED_MC
+ cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
+ group = cpumask_first(mask);
+#elif defined(CONFIG_SCHED_SMT)
+ cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
+ group = cpumask_first(mask);
+#endif
+ if (sg)
+ *sg = &per_cpu(sched_group_book, group).sg;
+ return group;
+}
+#endif /* CONFIG_SCHED_BOOK */
+
static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
@@ -6572,7 +6601,10 @@ cpu_to_phys_group(int cpu, const struct
struct sched_group **sg, struct cpumask *mask)
{
int group;
-#ifdef CONFIG_SCHED_MC
+#ifdef CONFIG_SCHED_BOOK
+ cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
+ group = cpumask_first(mask);
+#elif defined(CONFIG_SCHED_MC)
cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
group = cpumask_first(mask);
#elif defined(CONFIG_SCHED_SMT)
@@ -6833,6 +6865,9 @@ SD_INIT_FUNC(CPU)
#ifdef CONFIG_SCHED_MC
SD_INIT_FUNC(MC)
#endif
+#ifdef CONFIG_SCHED_BOOK
+ SD_INIT_FUNC(BOOK)
+#endif
static int default_relax_domain_level = -1;
@@ -6882,6 +6917,8 @@ static void __free_domain_allocs(struct
free_cpumask_var(d->tmpmask); /* fall through */
case sa_send_covered:
free_cpumask_var(d->send_covered); /* fall through */
+ case sa_this_book_map:
+ free_cpumask_var(d->this_book_map); /* fall through */
case sa_this_core_map:
free_cpumask_var(d->this_core_map); /* fall through */
case sa_this_sibling_map:
@@ -6928,8 +6965,10 @@ static enum s_alloc __visit_domain_alloc
return sa_nodemask;
if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
return sa_this_sibling_map;
- if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
+ if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL))
return sa_this_core_map;
+ if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
+ return sa_this_book_map;
if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
return sa_send_covered;
d->rd = alloc_rootdomain();
@@ -6987,6 +7026,23 @@ static struct sched_domain *__build_cpu_
return sd;
}
+static struct sched_domain *__build_book_sched_domain(struct s_data *d,
+ const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+ struct sched_domain *parent, int i)
+{
+ struct sched_domain *sd = parent;
+#ifdef CONFIG_SCHED_BOOK
+ sd = &per_cpu(book_domains, i).sd;
+ SD_INIT(sd, BOOK);
+ set_domain_attribute(sd, attr);
+ cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
+ sd->parent = parent;
+ parent->child = sd;
+ cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
+#endif
+ return sd;
+}
+
static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
const struct cpumask *cpu_map, struct sched_domain_attr *attr,
struct sched_domain *parent, int i)
@@ -7044,6 +7100,15 @@ static void build_sched_groups(struct s_
d->send_covered, d->tmpmask);
break;
#endif
+#ifdef CONFIG_SCHED_BOOK
+ case SD_LV_BOOK: /* set up book groups */
+ cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu));
+ if (cpu == cpumask_first(d->this_book_map))
+ init_sched_build_groups(d->this_book_map, cpu_map,
+ &cpu_to_book_group,
+ d->send_covered, d->tmpmask);
+ break;
+#endif
case SD_LV_CPU: /* set up physical groups */
cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
if (!cpumask_empty(d->nodemask))
@@ -7091,12 +7156,14 @@ static int __build_sched_domains(const s
sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
+ sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i);
sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
}
for_each_cpu(i, cpu_map) {
build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
+ build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
build_sched_groups(&d, SD_LV_MC, cpu_map, i);
}
@@ -7127,6 +7194,12 @@ static int __build_sched_domains(const s
init_sched_groups_power(i, sd);
}
#endif
+#ifdef CONFIG_SCHED_BOOK
+ for_each_cpu(i, cpu_map) {
+ sd = &per_cpu(book_domains, i).sd;
+ init_sched_groups_power(i, sd);
+ }
+#endif
for_each_cpu(i, cpu_map) {
sd = &per_cpu(phys_domains, i).sd;
@@ -7152,6 +7225,8 @@ static int __build_sched_domains(const s
sd = &per_cpu(cpu_domains, i).sd;
#elif defined(CONFIG_SCHED_MC)
sd = &per_cpu(core_domains, i).sd;
+#elif defined(CONFIG_SCHED_BOOK)
+ sd = &per_cpu(book_domains, i).sd;
#else
sd = &per_cpu(phys_domains, i).sd;
#endif
@@ -7368,7 +7443,8 @@ match2:
mutex_unlock(&sched_domains_mutex);
}
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+#if defined(CONFIG_SCHED_BOOK) || defined(CONFIG_SCHED_MC) || \
+ defined(CONFIG_SCHED_SMT)
static void arch_reinit_sched_domains(void)
{
get_online_cpus();
@@ -7405,6 +7481,9 @@ static ssize_t sched_power_savings_store
case SD_LV_MC:
sched_mc_power_savings = level;
break;
+ case SD_LV_BOOK:
+ sched_book_power_savings = level;
+ break;
default:
break;
}
@@ -7414,6 +7493,24 @@ static ssize_t sched_power_savings_store
return count;
}
+#ifdef CONFIG_SCHED_BOOK
+static ssize_t sched_book_power_savings_show(struct sysdev_class *class,
+ struct sysdev_class_attribute *attr,
+ char *page)
+{
+ return sprintf(page, "%u\n", sched_book_power_savings);
+}
+static ssize_t sched_book_power_savings_store(struct sysdev_class *class,
+ struct sysdev_class_attribute *attr,
+ const char *buf, size_t count)
+{
+ return sched_power_savings_store(buf, count, SD_LV_BOOK);
+}
+static SYSDEV_CLASS_ATTR(sched_book_power_savings, 0644,
+ sched_book_power_savings_show,
+ sched_book_power_savings_store);
+#endif
+
#ifdef CONFIG_SCHED_MC
static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
struct sysdev_class_attribute *attr,
@@ -7464,9 +7561,14 @@ int __init sched_create_sysfs_power_savi
err = sysfs_create_file(&cls->kset.kobj,
&attr_sched_mc_power_savings.attr);
#endif
+#ifdef CONFIG_SCHED_BOOK
+ if (!err && book_capable())
+ err = sysfs_create_file(&cls->kset.kobj,
+ &attr_sched_book_power_savings.attr);
+#endif
return err;
}
-#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+#endif /* CONFIG_SCHED_BOOK || CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
/*
* Update cpusets according to cpu_active mask. If cpusets are
diff -urpN linux-2.6/kernel/sched_fair.c linux-2.6-patched/kernel/sched_fair.c
--- linux-2.6/kernel/sched_fair.c 2010-08-11 13:47:16.000000000 +0200
+++ linux-2.6-patched/kernel/sched_fair.c 2010-08-11 13:47:23.000000000 +0200
@@ -2039,7 +2039,8 @@ struct sd_lb_stats {
unsigned long busiest_group_capacity;
int group_imb; /* Is there imbalance in this sd */
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+#if defined(CONFIG_SCHED_BOOK) || defined(CONFIG_SCHED_MC) || \
+ defined(CONFIG_SCHED_SMT)
int power_savings_balance; /* Is powersave balance needed for this sd */
struct sched_group *group_min; /* Least loaded group in sd */
struct sched_group *group_leader; /* Group which relieves group_min */
@@ -2096,8 +2097,8 @@ static inline int get_sd_load_idx(struct
return load_idx;
}
-
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+#if defined(CONFIG_SCHED_BOOK) || defined(CONFIG_SCHED_MC) || \
+ defined(CONFIG_SCHED_SMT)
/**
* init_sd_power_savings_stats - Initialize power savings statistics for
* the given sched_domain, during load balancing.
@@ -2217,7 +2218,7 @@ static inline int check_power_save_busie
return 1;
}
-#else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+#else /* CONFIG_SCHED_BOOK || CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
static inline void init_sd_power_savings_stats(struct sched_domain *sd,
struct sd_lb_stats *sds, enum cpu_idle_type idle)
{
@@ -2235,7 +2236,7 @@ static inline int check_power_save_busie
{
return 0;
}
-#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+#endif /* CONFIG_SCHED_BOOK || CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
next prev parent reply other threads:[~2010-08-12 17:24 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-08-12 17:25 [PATCH/RFC 0/5] sched: add new 'book' scheduling domain Heiko Carstens
2010-08-12 17:25 ` [PATCH/RFC 1/5] [PATCH] sched: merge cpu_to_core_group functions Heiko Carstens
2010-08-13 21:11 ` Suresh Siddha
2010-08-31 8:26 ` Heiko Carstens
2010-08-12 17:25 ` [PATCH/RFC 2/5] [PATCH] sched: pass sched_domain_level to sched_power_savings_store Heiko Carstens
2010-08-13 21:13 ` Suresh Siddha
2010-08-19 11:36 ` Andreas Herrmann
2010-08-16 8:29 ` Peter Zijlstra
2010-08-19 11:41 ` Andreas Herrmann
2010-08-19 12:35 ` Peter Zijlstra
2010-08-19 12:32 ` Andreas Herrmann
2010-08-12 17:25 ` Heiko Carstens [this message]
2010-08-13 21:22 ` [PATCH/RFC 3/5] [PATCH] sched: add book scheduling domain Suresh Siddha
2010-08-16 8:48 ` Peter Zijlstra
2010-08-12 17:25 ` [PATCH/RFC 4/5] [PATCH] topology/sysfs: provide book id and siblings attributes Heiko Carstens
2010-08-12 17:25 ` [PATCH/RFC 5/5] [PATCH] topology: add z196 cpu topology support Heiko Carstens
2010-08-19 12:22 ` [PATCH/RFC 0/5] sched: add new 'book' scheduling domain Andreas Herrmann
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100812172623.140578409@de.ibm.com \
--to=heiko.carstens@de.ibm.com \
--cc=a.p.zijlstra@chello.nl \
--cc=andreas.herrmann3@amd.com \
--cc=efault@gmx.de \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=schwidefsky@de.ibm.com \
--cc=suresh.b.siddha@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.