public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Heiko Carstens <heiko.carstens@de.ibm.com>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Mike Galbraith <efault@gmx.de>, Ingo Molnar <mingo@elte.hu>,
	Suresh Siddha <suresh.b.siddha@intel.com>,
	Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: linux-kernel@vger.kernel.org,
	Martin Schwidefsky <schwidefsky@de.ibm.com>,
	Heiko Carstens <heiko.carstens@de.ibm.com>
Subject: [PATCH/RFC 3/5] [PATCH] sched: add book scheduling domain
Date: Thu, 12 Aug 2010 19:25:47 +0200	[thread overview]
Message-ID: <20100812172623.140578409@de.ibm.com> (raw)
In-Reply-To: 20100812172544.655648128@de.ibm.com

[-- Attachment #1: 03-sched-book.diff --]
[-- Type: text/plain, Size: 12431 bytes --]

From: Heiko Carstens <heiko.carstens@de.ibm.com>

On top of the SMT and MC scheduling domains this adds the BOOK scheduling
domain. This is useful for machines that have a four level cache hierarchy
and but do not fall into the NUMA category.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---

 arch/s390/defconfig      |    1 
 include/linux/sched.h    |   19 +++++++
 include/linux/topology.h |    6 ++
 kernel/sched.c           |  112 ++++++++++++++++++++++++++++++++++++++++++++---
 kernel/sched_fair.c      |   11 ++--
 5 files changed, 137 insertions(+), 12 deletions(-)

diff -urpN linux-2.6/arch/s390/defconfig linux-2.6-patched/arch/s390/defconfig
--- linux-2.6/arch/s390/defconfig	2010-08-02 00:11:14.000000000 +0200
+++ linux-2.6-patched/arch/s390/defconfig	2010-08-11 13:47:23.000000000 +0200
@@ -248,6 +248,7 @@ CONFIG_64BIT=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=32
 CONFIG_HOTPLUG_CPU=y
+# CONFIG_SCHED_BOOK is not set
 CONFIG_COMPAT=y
 CONFIG_SYSVIPC_COMPAT=y
 CONFIG_AUDIT_ARCH=y
diff -urpN linux-2.6/include/linux/sched.h linux-2.6-patched/include/linux/sched.h
--- linux-2.6/include/linux/sched.h	2010-08-11 13:47:16.000000000 +0200
+++ linux-2.6-patched/include/linux/sched.h	2010-08-11 13:47:23.000000000 +0200
@@ -807,7 +807,9 @@ enum powersavings_balance_level {
 	MAX_POWERSAVINGS_BALANCE_LEVELS
 };
 
-extern int sched_mc_power_savings, sched_smt_power_savings;
+extern int sched_smt_power_savings;
+extern int sched_mc_power_savings;
+extern int sched_book_power_savings;
 
 static inline int sd_balance_for_mc_power(void)
 {
@@ -820,11 +822,23 @@ static inline int sd_balance_for_mc_powe
 	return 0;
 }
 
-static inline int sd_balance_for_package_power(void)
+static inline int sd_balance_for_book_power(void)
 {
 	if (sched_mc_power_savings | sched_smt_power_savings)
 		return SD_POWERSAVINGS_BALANCE;
 
+	if (!sched_book_power_savings)
+		return SD_PREFER_SIBLING;
+
+	return 0;
+}
+
+static inline int sd_balance_for_package_power(void)
+{
+	if (sched_book_power_savings | sched_mc_power_savings |
+	    sched_smt_power_savings)
+		return SD_POWERSAVINGS_BALANCE;
+
 	return SD_PREFER_SIBLING;
 }
 
@@ -875,6 +889,7 @@ enum sched_domain_level {
 	SD_LV_NONE = 0,
 	SD_LV_SIBLING,
 	SD_LV_MC,
+	SD_LV_BOOK,
 	SD_LV_CPU,
 	SD_LV_NODE,
 	SD_LV_ALLNODES,
diff -urpN linux-2.6/include/linux/topology.h linux-2.6-patched/include/linux/topology.h
--- linux-2.6/include/linux/topology.h	2010-08-11 13:47:16.000000000 +0200
+++ linux-2.6-patched/include/linux/topology.h	2010-08-11 13:47:23.000000000 +0200
@@ -201,6 +201,12 @@ int arch_update_cpu_topology(void);
 	.balance_interval	= 64,					\
 }
 
+#ifdef CONFIG_SCHED_BOOK
+#ifndef SD_BOOK_INIT
+#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!!
+#endif
+#endif /* CONFIG_SCHED_BOOK */
+
 #ifdef CONFIG_NUMA
 #ifndef SD_NODE_INIT
 #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
diff -urpN linux-2.6/kernel/sched.c linux-2.6-patched/kernel/sched.c
--- linux-2.6/kernel/sched.c	2010-08-11 13:47:23.000000000 +0200
+++ linux-2.6-patched/kernel/sched.c	2010-08-11 13:47:23.000000000 +0200
@@ -6472,7 +6472,9 @@ static void sched_domain_node_span(int n
 }
 #endif /* CONFIG_NUMA */
 
-int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
+int sched_smt_power_savings;
+int sched_mc_power_savings;
+int sched_book_power_savings;
 
 /*
  * The cpus mask in sched_group and sched_domain hangs off the end.
@@ -6500,6 +6502,7 @@ struct s_data {
 	cpumask_var_t		nodemask;
 	cpumask_var_t		this_sibling_map;
 	cpumask_var_t		this_core_map;
+	cpumask_var_t		this_book_map;
 	cpumask_var_t		send_covered;
 	cpumask_var_t		tmpmask;
 	struct sched_group	**sched_group_nodes;
@@ -6511,6 +6514,7 @@ enum s_alloc {
 	sa_rootdomain,
 	sa_tmpmask,
 	sa_send_covered,
+	sa_this_book_map,
 	sa_this_core_map,
 	sa_this_sibling_map,
 	sa_nodemask,
@@ -6564,6 +6568,31 @@ cpu_to_core_group(int cpu, const struct 
 }
 #endif /* CONFIG_SCHED_MC */
 
+/*
+ * book sched-domains:
+ */
+#ifdef CONFIG_SCHED_BOOK
+static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
+
+static int
+cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *mask)
+{
+	int group = cpu;
+#ifdef CONFIG_SCHED_MC
+	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
+	group = cpumask_first(mask);
+#elif defined(CONFIG_SCHED_SMT)
+	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
+	group = cpumask_first(mask);
+#endif
+	if (sg)
+		*sg = &per_cpu(sched_group_book, group).sg;
+	return group;
+}
+#endif /* CONFIG_SCHED_BOOK */
+
 static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
 static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
 
@@ -6572,7 +6601,10 @@ cpu_to_phys_group(int cpu, const struct 
 		  struct sched_group **sg, struct cpumask *mask)
 {
 	int group;
-#ifdef CONFIG_SCHED_MC
+#ifdef CONFIG_SCHED_BOOK
+	cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
+	group = cpumask_first(mask);
+#elif defined(CONFIG_SCHED_MC)
 	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
 	group = cpumask_first(mask);
 #elif defined(CONFIG_SCHED_SMT)
@@ -6833,6 +6865,9 @@ SD_INIT_FUNC(CPU)
 #ifdef CONFIG_SCHED_MC
  SD_INIT_FUNC(MC)
 #endif
+#ifdef CONFIG_SCHED_BOOK
+ SD_INIT_FUNC(BOOK)
+#endif
 
 static int default_relax_domain_level = -1;
 
@@ -6882,6 +6917,8 @@ static void __free_domain_allocs(struct 
 		free_cpumask_var(d->tmpmask); /* fall through */
 	case sa_send_covered:
 		free_cpumask_var(d->send_covered); /* fall through */
+	case sa_this_book_map:
+		free_cpumask_var(d->this_book_map); /* fall through */
 	case sa_this_core_map:
 		free_cpumask_var(d->this_core_map); /* fall through */
 	case sa_this_sibling_map:
@@ -6928,8 +6965,10 @@ static enum s_alloc __visit_domain_alloc
 		return sa_nodemask;
 	if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
 		return sa_this_sibling_map;
-	if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
+	if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL))
 		return sa_this_core_map;
+	if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
+		return sa_this_book_map;
 	if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
 		return sa_send_covered;
 	d->rd = alloc_rootdomain();
@@ -6987,6 +7026,23 @@ static struct sched_domain *__build_cpu_
 	return sd;
 }
 
+static struct sched_domain *__build_book_sched_domain(struct s_data *d,
+	const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+	struct sched_domain *parent, int i)
+{
+	struct sched_domain *sd = parent;
+#ifdef CONFIG_SCHED_BOOK
+	sd = &per_cpu(book_domains, i).sd;
+	SD_INIT(sd, BOOK);
+	set_domain_attribute(sd, attr);
+	cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
+	sd->parent = parent;
+	parent->child = sd;
+	cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
+#endif
+	return sd;
+}
+
 static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
 	const struct cpumask *cpu_map, struct sched_domain_attr *attr,
 	struct sched_domain *parent, int i)
@@ -7044,6 +7100,15 @@ static void build_sched_groups(struct s_
 						d->send_covered, d->tmpmask);
 		break;
 #endif
+#ifdef CONFIG_SCHED_BOOK
+	case SD_LV_BOOK: /* set up book groups */
+		cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu));
+		if (cpu == cpumask_first(d->this_book_map))
+			init_sched_build_groups(d->this_book_map, cpu_map,
+						&cpu_to_book_group,
+						d->send_covered, d->tmpmask);
+		break;
+#endif
 	case SD_LV_CPU: /* set up physical groups */
 		cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
 		if (!cpumask_empty(d->nodemask))
@@ -7091,12 +7156,14 @@ static int __build_sched_domains(const s
 
 		sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
 		sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
+		sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i);
 		sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
 		sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
 	}
 
 	for_each_cpu(i, cpu_map) {
 		build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
+		build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
 		build_sched_groups(&d, SD_LV_MC, cpu_map, i);
 	}
 
@@ -7127,6 +7194,12 @@ static int __build_sched_domains(const s
 		init_sched_groups_power(i, sd);
 	}
 #endif
+#ifdef CONFIG_SCHED_BOOK
+	for_each_cpu(i, cpu_map) {
+		sd = &per_cpu(book_domains, i).sd;
+		init_sched_groups_power(i, sd);
+	}
+#endif
 
 	for_each_cpu(i, cpu_map) {
 		sd = &per_cpu(phys_domains, i).sd;
@@ -7152,6 +7225,8 @@ static int __build_sched_domains(const s
 		sd = &per_cpu(cpu_domains, i).sd;
 #elif defined(CONFIG_SCHED_MC)
 		sd = &per_cpu(core_domains, i).sd;
+#elif defined(CONFIG_SCHED_BOOK)
+		sd = &per_cpu(book_domains, i).sd;
 #else
 		sd = &per_cpu(phys_domains, i).sd;
 #endif
@@ -7368,7 +7443,8 @@ match2:
 	mutex_unlock(&sched_domains_mutex);
 }
 
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+#if defined(CONFIG_SCHED_BOOK) || defined(CONFIG_SCHED_MC) || \
+    defined(CONFIG_SCHED_SMT)
 static void arch_reinit_sched_domains(void)
 {
 	get_online_cpus();
@@ -7405,6 +7481,9 @@ static ssize_t sched_power_savings_store
 	case SD_LV_MC:
 		sched_mc_power_savings = level;
 		break;
+	case SD_LV_BOOK:
+		sched_book_power_savings = level;
+		break;
 	default:
 		break;
 	}
@@ -7414,6 +7493,24 @@ static ssize_t sched_power_savings_store
 	return count;
 }
 
+#ifdef CONFIG_SCHED_BOOK
+static ssize_t sched_book_power_savings_show(struct sysdev_class *class,
+					     struct sysdev_class_attribute *attr,
+					     char *page)
+{
+	return sprintf(page, "%u\n", sched_book_power_savings);
+}
+static ssize_t sched_book_power_savings_store(struct sysdev_class *class,
+					      struct sysdev_class_attribute *attr,
+					      const char *buf, size_t count)
+{
+	return sched_power_savings_store(buf, count, SD_LV_BOOK);
+}
+static SYSDEV_CLASS_ATTR(sched_book_power_savings, 0644,
+			 sched_book_power_savings_show,
+			 sched_book_power_savings_store);
+#endif
+
 #ifdef CONFIG_SCHED_MC
 static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
 					   struct sysdev_class_attribute *attr,
@@ -7464,9 +7561,14 @@ int __init sched_create_sysfs_power_savi
 		err = sysfs_create_file(&cls->kset.kobj,
 					&attr_sched_mc_power_savings.attr);
 #endif
+#ifdef CONFIG_SCHED_BOOK
+	if (!err && book_capable())
+		err = sysfs_create_file(&cls->kset.kobj,
+					&attr_sched_book_power_savings.attr);
+#endif
 	return err;
 }
-#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+#endif /* CONFIG_SCHED_BOOK || CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
 
 /*
  * Update cpusets according to cpu_active mask.  If cpusets are
diff -urpN linux-2.6/kernel/sched_fair.c linux-2.6-patched/kernel/sched_fair.c
--- linux-2.6/kernel/sched_fair.c	2010-08-11 13:47:16.000000000 +0200
+++ linux-2.6-patched/kernel/sched_fair.c	2010-08-11 13:47:23.000000000 +0200
@@ -2039,7 +2039,8 @@ struct sd_lb_stats {
 	unsigned long busiest_group_capacity;
 
 	int group_imb; /* Is there imbalance in this sd */
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+#if defined(CONFIG_SCHED_BOOK) || defined(CONFIG_SCHED_MC) || \
+    defined(CONFIG_SCHED_SMT)
 	int power_savings_balance; /* Is powersave balance needed for this sd */
 	struct sched_group *group_min; /* Least loaded group in sd */
 	struct sched_group *group_leader; /* Group which relieves group_min */
@@ -2096,8 +2097,8 @@ static inline int get_sd_load_idx(struct
 	return load_idx;
 }
 
-
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+#if defined(CONFIG_SCHED_BOOK) || defined(CONFIG_SCHED_MC) || \
+    defined(CONFIG_SCHED_SMT)
 /**
  * init_sd_power_savings_stats - Initialize power savings statistics for
  * the given sched_domain, during load balancing.
@@ -2217,7 +2218,7 @@ static inline int check_power_save_busie
 	return 1;
 
 }
-#else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+#else /* CONFIG_SCHED_BOOK || CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
 static inline void init_sd_power_savings_stats(struct sched_domain *sd,
 	struct sd_lb_stats *sds, enum cpu_idle_type idle)
 {
@@ -2235,7 +2236,7 @@ static inline int check_power_save_busie
 {
 	return 0;
 }
-#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+#endif /* CONFIG_SCHED_BOOK || CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
 
 
 unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)


  parent reply	other threads:[~2010-08-12 17:24 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-12 17:25 [PATCH/RFC 0/5] sched: add new 'book' scheduling domain Heiko Carstens
2010-08-12 17:25 ` [PATCH/RFC 1/5] [PATCH] sched: merge cpu_to_core_group functions Heiko Carstens
2010-08-13 21:11   ` Suresh Siddha
2010-08-31  8:26     ` Heiko Carstens
2010-08-12 17:25 ` [PATCH/RFC 2/5] [PATCH] sched: pass sched_domain_level to sched_power_savings_store Heiko Carstens
2010-08-13 21:13   ` Suresh Siddha
2010-08-19 11:36     ` Andreas Herrmann
2010-08-16  8:29   ` Peter Zijlstra
2010-08-19 11:41     ` Andreas Herrmann
2010-08-19 12:35       ` Peter Zijlstra
2010-08-19 12:32         ` Andreas Herrmann
2010-08-12 17:25 ` Heiko Carstens [this message]
2010-08-13 21:22   ` [PATCH/RFC 3/5] [PATCH] sched: add book scheduling domain Suresh Siddha
2010-08-16  8:48     ` Peter Zijlstra
2010-08-12 17:25 ` [PATCH/RFC 4/5] [PATCH] topology/sysfs: provide book id and siblings attributes Heiko Carstens
2010-08-12 17:25 ` [PATCH/RFC 5/5] [PATCH] topology: add z196 cpu topology support Heiko Carstens
2010-08-19 12:22 ` [PATCH/RFC 0/5] sched: add new 'book' scheduling domain Andreas Herrmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100812172623.140578409@de.ibm.com \
    --to=heiko.carstens@de.ibm.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=andreas.herrmann3@amd.com \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=schwidefsky@de.ibm.com \
    --cc=suresh.b.siddha@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox