From: Frederic Weisbecker <fweisbec@gmail.com>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
Peter Zijlstra <peterz@infradead.org>,
Chris Metcalf <cmetcalf@mellanox.com>,
Thomas Gleixner <tglx@linutronix.de>,
Luiz Capitulino <lcapitulino@redhat.com>,
Christoph Lameter <cl@linux.com>,
"Paul E . McKenney" <paulmck@linux.vnet.ibm.com>,
Ingo Molnar <mingo@kernel.org>, Mike Galbraith <efault@gmx.de>,
Rik van Riel <riel@redhat.com>, Wanpeng Li <kernellwp@gmail.com>
Subject: [RFC PATCH 12/12] housekeeping: Reimplement isolcpus on housekeeping
Date: Wed, 23 Aug 2017 03:51:11 +0200 [thread overview]
Message-ID: <1503453071-952-13-git-send-email-fweisbec@gmail.com> (raw)
In-Reply-To: <1503453071-952-1-git-send-email-fweisbec@gmail.com>
We want to centralize the isolation features on the housekeeping
subsystem and scheduler isolation is a significant part of it.
While at it, this is a proposition for a reimplementation of isolcpus=
that doesn't involve scheduler domain isolation. Therefore this
brings a behaviour change: all user tasks inherit init/1 affinity which
avoid the isolcpus= range. But if a task later overrides its affinity
which turns out to intersect an isolated CPU, load balancing may occur
on it.
OTOH such a reimplementation that doesn't shortcut scheduler internals
makes a better candidate for an interface extension to cpuset.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
---
drivers/base/cpu.c | 10 ++++++++-
include/linux/sched.h | 2 --
kernel/cgroup/cpuset.c | 13 ++---------
kernel/housekeeping.c | 57 +++++++++++++++++++++++++++++++++++++++++--------
kernel/sched/core.c | 16 +-------------
kernel/sched/topology.c | 19 ++---------------
6 files changed, 62 insertions(+), 55 deletions(-)
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 2c3b359..35b2b10 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -18,6 +18,7 @@
#include <linux/cpufeature.h>
#include <linux/tick.h>
#include <linux/pm_qos.h>
+#include <linux/housekeeping.h>
#include "base.h"
@@ -271,8 +272,15 @@ static ssize_t print_cpus_isolated(struct device *dev,
struct device_attribute *attr, char *buf)
{
int n = 0, len = PAGE_SIZE-2;
+ cpumask_var_t isolated;
- n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(cpu_isolated_map));
+ if (!alloc_cpumask_var(&isolated, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_andnot(isolated, cpu_possible_mask, housekeeping_cpumask(HK_FLAG_SCHED));
+ n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated));
+
+ free_cpumask_var(isolated);
return n;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c28b182..816ff52 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -166,8 +166,6 @@ struct task_group;
/* Task command name length: */
#define TASK_COMM_LEN 16
-extern cpumask_var_t cpu_isolated_map;
-
extern void scheduler_tick(void);
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 8d51516..5d71020 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -639,7 +639,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
- cpumask_var_t non_isolated_cpus; /* load balanced CPUs */
struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] struct cpumask slot */
@@ -649,10 +648,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
dattr = NULL;
csa = NULL;
- if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
- goto done;
- cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
-
/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
ndoms = 1;
@@ -665,8 +660,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
*dattr = SD_ATTR_INIT;
update_domain_attr_tree(dattr, &top_cpuset);
}
- cpumask_and(doms[0], top_cpuset.effective_cpus,
- non_isolated_cpus);
+ cpumask_copy(doms[0], top_cpuset.effective_cpus);
goto done;
}
@@ -689,8 +683,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
* the corresponding sched domain.
*/
if (!cpumask_empty(cp->cpus_allowed) &&
- !(is_sched_load_balance(cp) &&
- cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
+ !(is_sched_load_balance(cp)))
continue;
if (is_sched_load_balance(cp))
@@ -772,7 +765,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
if (apn == b->pn) {
cpumask_or(dp, dp, b->effective_cpus);
- cpumask_and(dp, dp, non_isolated_cpus);
if (dattr)
update_domain_attr_tree(dattr + nslot, b);
@@ -785,7 +777,6 @@ static int generate_sched_domains(cpumask_var_t **domains,
BUG_ON(nslot != ndoms);
done:
- free_cpumask_var(non_isolated_cpus);
kfree(csa);
/*
diff --git a/kernel/housekeeping.c b/kernel/housekeeping.c
index 633a0d9..1fd9316 100644
--- a/kernel/housekeeping.c
+++ b/kernel/housekeeping.c
@@ -58,30 +58,69 @@ void __init housekeeping_init(void)
WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
}
-static int __init housekeeping_nohz_full_setup(char *str)
+static int __init housekeeping_setup(char *str, enum hk_flags flags)
{
cpumask_var_t non_housekeeping_mask;
alloc_bootmem_cpumask_var(&non_housekeeping_mask);
if (cpulist_parse(str, non_housekeeping_mask) < 0) {
- pr_warn("Housekeeping: Incorrect nohz_full cpumask\n");
free_bootmem_cpumask_var(non_housekeeping_mask);
return 0;
}
- alloc_bootmem_cpumask_var(&housekeeping_mask);
- cpumask_andnot(housekeeping_mask, cpu_possible_mask, non_housekeeping_mask);
+ if (!housekeeping_flags) {
+ alloc_bootmem_cpumask_var(&housekeeping_mask);
+ cpumask_andnot(housekeeping_mask,
+ cpu_possible_mask, non_housekeeping_mask);
+ if (cpumask_empty(housekeeping_mask))
+ cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
+ } else {
+ cpumask_var_t tmp;
- if (cpumask_empty(housekeeping_mask))
- cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
+ alloc_bootmem_cpumask_var(&tmp);
+ cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask);
+ if (!cpumask_equal(tmp, housekeeping_mask)) {
+ pr_warn("Housekeeping: nohz_full= must match isolcpus=\n");
+ free_bootmem_cpumask_var(tmp);
+ free_bootmem_cpumask_var(non_housekeeping_mask);
+ return 0;
+ }
+ free_bootmem_cpumask_var(tmp);
+ }
- housekeeping_flags = HK_FLAG_TICK | HK_FLAG_TIMER |
- HK_FLAG_RCU | HK_FLAG_MISC;
+ if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK))
+ tick_nohz_full_setup(non_housekeeping_mask);
- tick_nohz_full_setup(non_housekeeping_mask);
+ housekeeping_flags |= flags;
free_bootmem_cpumask_var(non_housekeeping_mask);
return 1;
}
+
+static int __init housekeeping_nohz_full_setup(char *str)
+{
+ unsigned int flags;
+ int ret;
+
+ flags = HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC;
+ ret = housekeeping_setup(str, flags);
+ if (!ret)
+ pr_warn("Housekeeping: Incorrect nohz_full cpumask\n");
+ return ret;
+}
__setup("nohz_full=", housekeeping_nohz_full_setup);
+
+static int __init housekeeping_isolcpus_setup(char *str)
+{
+ unsigned int flags;
+ int ret;
+
+ flags = HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC |
+ HK_FLAG_SCHED | HK_FLAG_WORKQUEUE | HK_FLAG_KTHREAD;
+ ret = housekeeping_setup(str, flags);
+ if (!ret)
+ pr_warn("Housekeeping: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids);
+ return ret;
+}
+__setup("isolcpus=", housekeeping_isolcpus_setup);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 877c85d..269f3ac 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -84,9 +84,6 @@ __read_mostly int scheduler_running;
*/
int sysctl_sched_rt_runtime = 950000;
-/* CPUs with isolated domains */
-cpumask_var_t cpu_isolated_map;
-
/*
* __task_rq_lock - lock the rq @p resides on.
*/
@@ -5672,10 +5669,6 @@ static inline void sched_init_smt(void) { }
void __init sched_init_smp(void)
{
- cpumask_var_t non_isolated_cpus;
-
- alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
-
sched_init_numa();
/*
@@ -5685,16 +5678,12 @@ void __init sched_init_smp(void)
*/
mutex_lock(&sched_domains_mutex);
sched_init_domains(cpu_active_mask);
- cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
- if (cpumask_empty(non_isolated_cpus))
- cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
mutex_unlock(&sched_domains_mutex);
/* Move init over to a non-isolated CPU */
- if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
+ if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_SCHED)) < 0)
BUG();
sched_init_granularity();
- free_cpumask_var(non_isolated_cpus);
init_sched_rt_class();
init_sched_dl_class();
@@ -5898,9 +5887,6 @@ void __init sched_init(void)
calc_load_update = jiffies + LOAD_FREQ;
#ifdef CONFIG_SMP
- /* May be allocated at isolcpus cmdline parse time */
- if (cpu_isolated_map == NULL)
- zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
idle_thread_set_boot_cpu();
set_cpu_rq_start_time(smp_processor_id());
#endif
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index bd8b6d6..e060e28 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -466,21 +466,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
update_top_cache_domain(cpu);
}
-/* Setup the mask of CPUs configured for isolated domains */
-static int __init isolated_cpu_setup(char *str)
-{
- int ret;
-
- alloc_bootmem_cpumask_var(&cpu_isolated_map);
- ret = cpulist_parse(str, cpu_isolated_map);
- if (ret) {
- pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids);
- return 0;
- }
- return 1;
-}
-__setup("isolcpus=", isolated_cpu_setup);
-
struct s_data {
struct sched_domain ** __percpu sd;
struct root_domain *rd;
@@ -1775,7 +1760,7 @@ int sched_init_domains(const struct cpumask *cpu_map)
doms_cur = alloc_sched_domains(ndoms_cur);
if (!doms_cur)
doms_cur = &fallback_doms;
- cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
+ cpumask_copy(doms_cur[0], cpu_map);
err = build_sched_domains(doms_cur[0], NULL);
register_sched_domain_sysctl();
@@ -1871,7 +1856,7 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
if (doms_new == NULL) {
n = 0;
doms_new = &fallback_doms;
- cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
+ cpumask_copy(doms_new[0], cpu_active_mask);
WARN_ON_ONCE(dattr_new);
}
--
2.7.4
next prev parent reply other threads:[~2017-08-23 1:52 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-23 1:50 [RFC 00/12] Introduce housekeeping subsystem v2 Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 01/12] housekeeping: Move housekeeping related code to its own file Frederic Weisbecker
2017-08-31 20:16 ` Rik van Riel
2017-08-31 22:58 ` Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 02/12] watchdog: Use housekeeping_cpumask() instead of ad-hoc version Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 03/12] housekeeping: Provide a dynamic off-case to housekeeping_any_cpu() Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 04/12] housekeeping: Make housekeeping cpumask private Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 05/12] housekeeping: Use its own static key Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 06/12] housekeeping: Rename is_housekeeping_cpu to housekeeping_cpu Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 07/12] housekeeping: Move it under own config, independant from NO_HZ Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 08/12] housekeeping: Introduce housekeeping flags Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 09/12] workqueue: Affine unbound workqueues to housekeeping cpumask Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 10/12] housekeeping: Affine unbound kthreads Frederic Weisbecker
2017-08-23 1:51 ` [RFC PATCH 11/12] housekeeping: Handle nohz_full= parameter Frederic Weisbecker
2017-08-23 1:51 ` Frederic Weisbecker [this message]
2017-08-23 14:55 ` [RFC PATCH 12/12] housekeeping: Reimplement isolcpus on housekeeping Christopher Lameter
2017-08-24 13:19 ` Frederic Weisbecker
2017-08-28 10:10 ` Peter Zijlstra
2017-08-28 15:38 ` Christopher Lameter
2017-08-28 10:09 ` Peter Zijlstra
2017-08-28 13:23 ` Frederic Weisbecker
2017-08-28 13:31 ` Peter Zijlstra
2017-08-28 15:27 ` Frederic Weisbecker
2017-08-28 16:24 ` Peter Zijlstra
2017-08-28 16:53 ` Christopher Lameter
2017-08-28 17:33 ` Frederic Weisbecker
2017-08-31 18:53 ` Thomas Gleixner
2017-08-31 23:00 ` Frederic Weisbecker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1503453071-952-13-git-send-email-fweisbec@gmail.com \
--to=fweisbec@gmail.com \
--cc=cl@linux.com \
--cc=cmetcalf@mellanox.com \
--cc=efault@gmx.de \
--cc=kernellwp@gmail.com \
--cc=lcapitulino@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=riel@redhat.com \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.