From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
Oleg Nesterov <oleg@tv-sign.ru>,
Steven Rostedt <rostedt@goodmis.org>, Paul Jackson <pj@sgi.com>,
Max Krasnyanskiy <maxk@qualcomm.com>
Cc: linux-kernel@vger.kernel.org, Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC/PATCH 2/4] cpuset: system sets
Date: Wed, 27 Feb 2008 23:21:05 +0100 [thread overview]
Message-ID: <20080227222542.311184000@chello.nl> (raw)
In-Reply-To: 20080227222103.673194000@chello.nl
[-- Attachment #1: cpuset-system.patch --]
[-- Type: text/plain, Size: 9243 bytes --]
Introduce the notion of a System set. A system set will be one that caters the
general purpose OS. This patch provides the infrastructure, but doesn't
actually provide any new functionality.
Typical functionality would be setting the IRQ affinity of unbound IRQs to
within the system set. And setting the affinity of unbounded kernel threads to
within the system set.
Future patches will provide this.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/cpumask.h | 5 ++
include/linux/cpuset.h | 2
kernel/cpuset.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++-
kernel/sched.c | 3 +
4 files changed, 124 insertions(+), 1 deletion(-)
Index: linux-2.6/kernel/cpuset.c
===================================================================
--- linux-2.6.orig/kernel/cpuset.c
+++ linux-2.6/kernel/cpuset.c
@@ -64,6 +64,7 @@
* short circuit some hooks.
*/
int number_of_cpusets __read_mostly;
+int number_of_system_sets __read_mostly;
/* Forward declare cgroup structures */
struct cgroup_subsys cpuset_subsys;
@@ -128,6 +129,7 @@ typedef enum {
CS_SCHED_LOAD_BALANCE,
CS_SPREAD_PAGE,
CS_SPREAD_SLAB,
+ CS_SYSTEM,
} cpuset_flagbits_t;
/* convenient tests for these bits */
@@ -161,6 +163,11 @@ static inline int is_spread_slab(const s
return test_bit(CS_SPREAD_SLAB, &cs->flags);
}
+static inline int is_system(const struct cpuset *cs)
+{
+ return test_bit(CS_SYSTEM, &cs->flags);
+}
+
/*
* Increment this integer everytime any cpuset changes its
* mems_allowed value. Users of cpusets can track this generation
@@ -183,7 +190,9 @@ static inline int is_spread_slab(const s
static int cpuset_mems_generation;
static struct cpuset top_cpuset = {
- .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
+ .flags = ((1 << CS_CPU_EXCLUSIVE) |
+ (1 << CS_MEM_EXCLUSIVE) |
+ (1 << CS_SYSTEM)),
.cpus_allowed = CPU_MASK_ALL,
.mems_allowed = NODE_MASK_ALL,
};
@@ -465,6 +474,9 @@ static int validate_change(const struct
}
}
+ if (number_of_system_sets == 1 && is_system(cur) && !is_system(trial))
+ return -EINVAL;
+
return 0;
}
@@ -1011,6 +1023,74 @@ static int update_memory_pressure_enable
return 0;
}
+BLOCKING_NOTIFIER_HEAD(system_map_notifier);
+EXPORT_SYMBOL_GPL(system_map_notifier);
+
+int cpus_match_system(cpumask_t mask)
+{
+ cpumask_t online_system, online_mask;
+
+ cpus_and(online_system, cpu_system_map, cpu_online_map);
+ cpus_and(online_mask, mask, cpu_online_map);
+
+ return cpus_equal(online_system, online_mask);
+}
+
+static void rebuild_system_map(void)
+{
+ cpumask_t *new_system_map;
+ struct kfifo *q = NULL;
+ struct cpuset *cp;
+
+ new_system_map = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+ if (!new_system_map)
+ return;
+
+ if (is_system(&top_cpuset)) {
+ cpus_setall(*new_system_map);
+ goto notify;
+ }
+
+ cpus_clear(*new_system_map);
+
+ q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL);
+ if (IS_ERR(q))
+ goto done;
+
+ cp = &top_cpuset;
+ __kfifo_put(q, (void *)&cp, sizeof(cp));
+ while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
+ struct cgroup *cont;
+ struct cpuset *child;
+
+ if (is_system(cp)) {
+ cpus_or(*new_system_map,
+ *new_system_map, cp->cpus_allowed);
+ continue;
+ }
+
+ list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
+ child = cgroup_cs(cont);
+ __kfifo_put(q, (void *)&child, sizeof(cp));
+ }
+ }
+
+ if (cpus_empty(*new_system_map))
+ BUG();
+
+notify:
+ if (!cpus_match_system(*new_system_map)) {
+ blocking_notifier_call_chain(&system_map_notifier, 0,
+ new_system_map);
+ }
+ cpu_system_map = *new_system_map;
+
+done:
+ kfree(new_system_map);
+ if (q && !IS_ERR(q))
+ kfifo_free(q);
+}
+
/*
* update_flag - read a 0 or a 1 in a file and update associated flag
* bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
@@ -1029,6 +1109,7 @@ static int update_flag(cpuset_flagbits_t
struct cpuset trialcs;
int err;
int cpus_nonempty, balance_flag_changed;
+ int system_flag_changed;
turning_on = (simple_strtoul(buf, NULL, 10) != 0);
@@ -1045,6 +1126,7 @@ static int update_flag(cpuset_flagbits_t
cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
balance_flag_changed = (is_sched_load_balance(cs) !=
is_sched_load_balance(&trialcs));
+ system_flag_changed = (is_system(cs) != is_system(&trialcs));
mutex_lock(&callback_mutex);
cs->flags = trialcs.flags;
@@ -1053,6 +1135,15 @@ static int update_flag(cpuset_flagbits_t
if (cpus_nonempty && balance_flag_changed)
rebuild_sched_domains();
+ if (system_flag_changed) {
+ rebuild_system_map();
+
+ if (is_system(cs))
+ number_of_system_sets++;
+ else
+ number_of_system_sets--;
+ }
+
return 0;
}
@@ -1206,6 +1297,7 @@ typedef enum {
FILE_MEMORY_PRESSURE,
FILE_SPREAD_PAGE,
FILE_SPREAD_SLAB,
+ FILE_SYSTEM,
} cpuset_filetype_t;
static ssize_t cpuset_common_file_write(struct cgroup *cont,
@@ -1273,6 +1365,9 @@ static ssize_t cpuset_common_file_write(
retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
cs->mems_generation = cpuset_mems_generation++;
break;
+ case FILE_SYSTEM:
+ retval = update_flag(CS_SYSTEM, cs, buffer);
+ break;
default:
retval = -EINVAL;
goto out2;
@@ -1369,6 +1464,9 @@ static ssize_t cpuset_common_file_read(s
case FILE_SPREAD_SLAB:
*s++ = is_spread_slab(cs) ? '1' : '0';
break;
+ case FILE_SYSTEM:
+ *s++ = is_system(cs) ? '1' : '0';
+ break;
default:
retval = -EINVAL;
goto out;
@@ -1459,6 +1557,13 @@ static struct cftype cft_spread_slab = {
.private = FILE_SPREAD_SLAB,
};
+static struct cftype cft_system = {
+ .name = "system",
+ .read = cpuset_common_file_read,
+ .write = cpuset_common_file_write,
+ .private = FILE_SYSTEM,
+};
+
static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
{
int err;
@@ -1481,6 +1586,8 @@ static int cpuset_populate(struct cgroup
return err;
if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0)
return err;
+ if ((err = cgroup_add_file(cont, ss, &cft_system)) < 0)
+ return err;
/* memory_pressure_enabled is in root cpuset only */
if (err == 0 && !cont->parent)
err = cgroup_add_file(cont, ss,
@@ -1555,6 +1662,7 @@ static struct cgroup_subsys_state *cpuse
if (is_spread_slab(parent))
set_bit(CS_SPREAD_SLAB, &cs->flags);
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
+ set_bit(CS_SYSTEM, &cs->flags);
cs->cpus_allowed = CPU_MASK_NONE;
cs->mems_allowed = NODE_MASK_NONE;
cs->mems_generation = cpuset_mems_generation++;
@@ -1562,6 +1670,7 @@ static struct cgroup_subsys_state *cpuse
cs->parent = parent;
number_of_cpusets++;
+ number_of_system_sets++;
return &cs->css ;
}
@@ -1585,8 +1694,11 @@ static void cpuset_destroy(struct cgroup
if (is_sched_load_balance(cs))
update_flag(CS_SCHED_LOAD_BALANCE, cs, "0");
+ if (!is_system(cs))
+ update_flag(CS_SYSTEM, cs, "1");
number_of_cpusets--;
+ number_of_system_sets--;
kfree(cs);
}
@@ -1637,6 +1749,7 @@ int __init cpuset_init(void)
return err;
number_of_cpusets = 1;
+ number_of_system_sets = 1;
return 0;
}
Index: linux-2.6/include/linux/cpumask.h
===================================================================
--- linux-2.6.orig/include/linux/cpumask.h
+++ linux-2.6/include/linux/cpumask.h
@@ -380,6 +380,7 @@ static inline void __cpus_remap(cpumask_
extern cpumask_t cpu_possible_map;
extern cpumask_t cpu_online_map;
extern cpumask_t cpu_present_map;
+extern cpumask_t cpu_system_map;
#if NR_CPUS > 1
#define num_online_cpus() cpus_weight(cpu_online_map)
@@ -388,6 +389,7 @@ extern cpumask_t cpu_present_map;
#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map)
+#define cpu_system(cpu) cpu_isset((cpu), cpu_system_map)
#else
#define num_online_cpus() 1
#define num_possible_cpus() 1
@@ -395,8 +397,11 @@ extern cpumask_t cpu_present_map;
#define cpu_online(cpu) ((cpu) == 0)
#define cpu_possible(cpu) ((cpu) == 0)
#define cpu_present(cpu) ((cpu) == 0)
+#define cpu_system(cpu) ((cpu) == 0)
#endif
+extern int cpus_match_system(cpumask_t mask);
+
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
#ifdef CONFIG_SMP
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -4854,6 +4854,9 @@ asmlinkage long sys_sched_setaffinity(pi
cpumask_t cpu_present_map __read_mostly;
EXPORT_SYMBOL(cpu_present_map);
+cpumask_t cpu_system_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_system_map);
+
#ifndef CONFIG_SMP
cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
EXPORT_SYMBOL(cpu_online_map);
Index: linux-2.6/include/linux/cpuset.h
===================================================================
--- linux-2.6.orig/include/linux/cpuset.h
+++ linux-2.6/include/linux/cpuset.h
@@ -78,6 +78,8 @@ extern void cpuset_track_online_nodes(vo
extern int current_cpuset_is_being_rebound(void);
+extern struct blocking_notifier_head system_map_notifier;
+
#else /* !CONFIG_CPUSETS */
static inline int cpuset_init_early(void) { return 0; }
--
next prev parent reply other threads:[~2008-02-27 22:26 UTC|newest]
Thread overview: 94+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-02-27 22:21 [RFC/PATCH 0/4] CPUSET driven CPU isolation Peter Zijlstra
2008-02-27 22:21 ` [RFC/PATCH 1/4] sched: remove isolcpus Peter Zijlstra
2008-02-27 23:57 ` Max Krasnyanskiy
2008-02-28 10:19 ` Peter Zijlstra
2008-02-28 19:36 ` Max Krasnyansky
2008-02-27 22:21 ` Peter Zijlstra [this message]
2008-02-27 23:39 ` [RFC/PATCH 2/4] cpuset: system sets Paul Jackson
2008-02-28 1:53 ` Max Krasnyanskiy
2008-02-27 23:52 ` Max Krasnyanskiy
2008-02-28 0:11 ` Paul Jackson
2008-02-28 0:29 ` Steven Rostedt
2008-02-28 1:45 ` Max Krasnyanskiy
2008-02-28 3:41 ` Steven Rostedt
2008-02-28 4:58 ` Max Krasnyansky
2008-02-27 22:21 ` [RFC/PATCH 3/4] genirq: system set irq affinities Peter Zijlstra
2008-02-28 0:10 ` Max Krasnyanskiy
2008-02-28 10:19 ` Peter Zijlstra
2008-02-27 22:21 ` [RFC/PATCH 4/4] kthread: system set kthread affinities Peter Zijlstra
2008-02-27 23:38 ` [RFC/PATCH 0/4] CPUSET driven CPU isolation Max Krasnyanskiy
2008-02-28 10:19 ` Peter Zijlstra
2008-02-28 17:33 ` Max Krasnyanskiy
2008-02-28 7:50 ` Ingo Molnar
2008-02-28 8:08 ` Paul Jackson
2008-02-28 9:08 ` Ingo Molnar
2008-02-28 9:17 ` Paul Jackson
2008-02-28 9:32 ` David Rientjes
2008-02-28 10:12 ` David Rientjes
2008-02-28 10:26 ` Peter Zijlstra
2008-02-28 17:37 ` Paul Jackson
2008-02-28 21:24 ` David Rientjes
2008-02-28 22:46 ` Paul Jackson
2008-02-28 23:00 ` David Rientjes
2008-02-29 0:16 ` Paul Jackson
2008-02-29 1:05 ` David Rientjes
2008-02-29 3:34 ` Paul Jackson
2008-02-29 4:00 ` David Rientjes
2008-02-29 6:53 ` Paul Jackson
2008-02-28 10:46 ` Ingo Molnar
2008-02-28 17:47 ` Paul Jackson
2008-02-28 20:11 ` Max Krasnyansky
2008-02-28 20:13 ` Paul Jackson
2008-02-28 20:26 ` Max Krasnyansky
2008-02-28 20:27 ` Paul Jackson
2008-02-28 20:45 ` Max Krasnyansky
2008-02-28 20:23 ` Max Krasnyansky
2008-02-28 17:48 ` Max Krasnyanskiy
2008-02-29 8:31 ` Andrew Morton
2008-02-29 8:36 ` Andrew Morton
2008-02-29 9:10 ` Ingo Molnar
2008-02-29 18:06 ` Max Krasnyanskiy
2008-02-28 12:12 ` Mark Hounschell
2008-02-28 19:57 ` Max Krasnyansky
2008-02-29 18:55 ` [RFC/PATCH] cpuset: cpuset irq affinities Peter Zijlstra
2008-02-29 19:02 ` Ingo Molnar
2008-02-29 20:52 ` Max Krasnyanskiy
2008-02-29 21:03 ` Peter Zijlstra
2008-02-29 21:20 ` Max Krasnyanskiy
2008-03-03 11:57 ` Peter Zijlstra
2008-03-03 17:36 ` Paul Jackson
2008-03-03 17:57 ` Peter Zijlstra
2008-03-03 18:10 ` Paul Jackson
2008-03-03 18:18 ` Peter Zijlstra
2008-03-04 7:35 ` Paul Jackson
2008-03-04 11:06 ` Peter Zijlstra
2008-03-04 19:52 ` Max Krasnyanskiy
2008-03-05 1:11 ` Paul Jackson
2008-03-05 8:37 ` Peter Zijlstra
2008-03-05 8:50 ` Ingo Molnar
2008-03-05 12:35 ` Paul Jackson
2008-03-05 12:43 ` Ingo Molnar
2008-03-05 17:44 ` Paul Jackson
2008-03-05 19:17 ` Max Krasnyansky
2008-03-06 13:47 ` Paul Jackson
2008-03-06 15:21 ` Peter Zijlstra
2008-03-07 3:40 ` Paul Jackson
2008-03-07 6:39 ` Paul Jackson
2008-03-07 8:47 ` Paul Menage
2008-03-07 14:57 ` Paul Jackson
2008-03-03 18:41 ` Paul Menage
2008-03-03 18:52 ` Paul Jackson
2008-03-04 5:26 ` Paul Menage
2008-03-04 6:15 ` Paul Jackson
2008-03-04 6:21 ` Paul Menage
2008-03-04 6:26 ` Paul Jackson
2008-03-04 6:34 ` Paul Menage
2008-03-04 6:51 ` Paul Jackson
2008-02-29 20:55 ` Paul Jackson
2008-02-29 21:14 ` Peter Zijlstra
2008-02-29 21:29 ` Ingo Molnar
2008-02-29 21:32 ` Ingo Molnar
2008-02-29 21:42 ` Max Krasnyanskiy
2008-02-29 22:00 ` Paul Jackson
2008-02-29 21:53 ` Paul Jackson
2008-03-02 5:18 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080227222542.311184000@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=linux-kernel@vger.kernel.org \
--cc=maxk@qualcomm.com \
--cc=mingo@elte.hu \
--cc=oleg@tv-sign.ru \
--cc=pj@sgi.com \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox