From: Frederic Weisbecker <frederic@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Chris Metcalf <cmetcalf@mellanox.com>,
Thomas Gleixner <tglx@linutronix.de>,
Luiz Capitulino <lcapitulino@redhat.com>,
Christoph Lameter <cl@linux.com>,
"Paul E . McKenney" <paulmck@linux.vnet.ibm.com>,
Ingo Molnar <mingo@kernel.org>, Wanpeng Li <kernellwp@gmail.com>,
Mike Galbraith <efault@gmx.de>, Rik van Riel <riel@redhat.com>
Subject: [PATCH 4/5] sched/isolation: Residual 1Hz scheduler tick offload
Date: Tue, 19 Dec 2017 04:23:57 +0100 [thread overview]
Message-ID: <1513653838-31314-5-git-send-email-frederic@kernel.org> (raw)
In-Reply-To: <1513653838-31314-1-git-send-email-frederic@kernel.org>
When a CPU runs in full dynticks mode, a 1Hz tick remains in order to
keep the scheduler stats alive. However this residual tick is a burden
for Real-Time tasks that can't stand no interruption at all.
Adding the boot parameter "isolcpus=nohz_offload" will now outsource
these scheduler ticks to the global workqueue so that a housekeeping CPU
handles that tick remotely.
Note it's still up to the user to affine the global workqueues to the
housekeeping CPUs through /sys/devices/virtual/workqueue/cpumask or
domains isolation.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <kernellwp@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
kernel/sched/core.c | 2 ++
kernel/sched/isolation.c | 4 +++
kernel/sched/sched.h | 6 ++++
kernel/sched/tick.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++--
4 files changed, 88 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b6f74c8..f50ba18 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5531,6 +5531,7 @@ int sched_cpu_starting(unsigned int cpu)
{
set_cpu_rq_start_time(cpu);
sched_rq_cpu_starting(cpu);
+ sched_tick_start(cpu);
return 0;
}
@@ -5542,6 +5543,7 @@ int sched_cpu_dying(unsigned int cpu)
/* Handle pending wakeups and then migrate everything off */
sched_ttwu_pending();
+ sched_tick_stop(cpu);
rq_lock_irqsave(rq, &rf);
if (rq->rd) {
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 264ddcd..c5e7e90a 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/static_key.h>
#include <linux/ctype.h>
+#include "sched.h"
DEFINE_STATIC_KEY_FALSE(housekeeping_overriden);
EXPORT_SYMBOL_GPL(housekeeping_overriden);
@@ -60,6 +61,9 @@ void __init housekeeping_init(void)
static_branch_enable(&housekeeping_overriden);
+ if (housekeeping_flags & HK_FLAG_TICK_SCHED)
+ sched_tick_offload_init();
+
/* We need at least one CPU to handle housekeeping work */
WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 16eef0c..57821c9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1587,6 +1587,9 @@ extern void post_init_entity_util_avg(struct sched_entity *se);
#ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(struct rq *rq);
+extern void sched_tick_start(int cpu);
+extern void sched_tick_stop(int cpu);
+extern int __init sched_tick_offload_init(void);
/*
* Tick may be needed by tasks in the runqueue depending on their policy and
@@ -1611,6 +1614,9 @@ static inline void sched_update_tick_dependency(struct rq *rq)
tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
}
#else
+static inline void sched_tick_start(int cpu) { }
+static inline void sched_tick_stop(int cpu) { }
+static inline int sched_tick_offload_init(void) { return 0; }
static inline void sched_update_tick_dependency(struct rq *rq) { }
#endif
diff --git a/kernel/sched/tick.c b/kernel/sched/tick.c
index 5eabfe3..fc31f9e 100644
--- a/kernel/sched/tick.c
+++ b/kernel/sched/tick.c
@@ -1,5 +1,6 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/isolation.h>
#include <linux/perf_event.h>
#include "sched.h"
@@ -50,9 +51,14 @@ void scheduler_tick(void)
*/
u64 scheduler_tick_max_deferment(void)
{
- struct rq *rq = this_rq();
- unsigned long next, now = READ_ONCE(jiffies);
+ struct rq *rq;
+ unsigned long next, now;
+ if (!housekeeping_cpu(smp_processor_id(), HK_FLAG_TICK_SCHED))
+ return ktime_to_ns(KTIME_MAX);
+
+ rq = this_rq();
+ now = READ_ONCE(jiffies);
next = rq->last_sched_tick + HZ;
if (time_before_eq(next, now))
@@ -60,7 +66,74 @@ u64 scheduler_tick_max_deferment(void)
return jiffies_to_nsecs(next - now);
}
-#endif
+
+struct tick_work {
+ int cpu;
+ struct delayed_work work;
+};
+
+static struct tick_work __percpu *tick_work_cpu;
+
+static void sched_tick_remote(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct tick_work *twork = container_of(dwork, struct tick_work, work);
+ struct rq *rq = cpu_rq(twork->cpu);
+ struct rq_flags rf;
+
+ rq_lock_irq(rq, &rf);
+ update_rq_clock(rq);
+ rq->curr->sched_class->task_tick(rq, rq->curr, 0);
+ rq_unlock_irq(rq, &rf);
+
+ queue_delayed_work(system_unbound_wq, dwork, HZ);
+}
+
+void sched_tick_start(int cpu)
+{
+ struct tick_work *twork;
+
+ if (housekeeping_cpu(cpu, HK_FLAG_TICK_SCHED))
+ return;
+
+ WARN_ON_ONCE(!tick_work_cpu);
+
+ twork = per_cpu_ptr(tick_work_cpu, cpu);
+ twork->cpu = cpu;
+ INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
+ queue_delayed_work(system_unbound_wq, &twork->work, HZ);
+
+ return;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void sched_tick_stop(int cpu)
+{
+ struct tick_work *twork;
+
+ if (housekeeping_cpu(cpu, HK_FLAG_TICK_SCHED))
+ return;
+
+ WARN_ON_ONCE(!tick_work_cpu);
+
+ twork = per_cpu_ptr(tick_work_cpu, cpu);
+ cancel_delayed_work_sync(&twork->work);
+
+ return;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+int __init sched_tick_offload_init(void)
+{
+ tick_work_cpu = alloc_percpu(struct tick_work);
+ if (!tick_work_cpu) {
+ pr_err("Can't allocate remote tick struct\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+#endif /* CONFIG_NO_HZ_FULL */
#ifdef CONFIG_SCHED_HRTICK
/*
--
2.7.4
next prev parent reply other threads:[~2017-12-19 3:24 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-19 3:23 [RFC PATCH 0/5] isolation: 1Hz residual tick offloading Frederic Weisbecker
2017-12-19 3:23 ` [PATCH 1/5] sched: Move tick code to a separate file Frederic Weisbecker
2017-12-19 9:08 ` Peter Zijlstra
2017-12-19 16:33 ` Frederic Weisbecker
2017-12-19 3:23 ` [PATCH 2/5] sched: Rename init_rq_hrtick to hrtick_rq_init Frederic Weisbecker
2017-12-19 3:23 ` [PATCH 3/5] sched/isolation: Add scheduler tick offloading interface Frederic Weisbecker
2017-12-19 3:23 ` Frederic Weisbecker [this message]
2017-12-19 9:19 ` [PATCH 4/5] sched/isolation: Residual 1Hz scheduler tick offload Peter Zijlstra
2017-12-19 14:34 ` Luiz Capitulino
2017-12-19 16:01 ` Christopher Lameter
2017-12-19 16:04 ` Peter Zijlstra
2017-12-19 16:38 ` Christopher Lameter
2017-12-19 16:49 ` Peter Zijlstra
2017-12-19 17:26 ` Christopher Lameter
2017-12-19 16:26 ` Frederic Weisbecker
2017-12-19 16:03 ` Christopher Lameter
2017-12-19 16:32 ` Frederic Weisbecker
2017-12-19 17:23 ` Christopher Lameter
2017-12-19 3:23 ` [PATCH 5/5] sched/isolation: Document "nohz_offload" flag Frederic Weisbecker
-- strict thread matches above, loose matches on Subject: below --
2017-12-21 17:14 [PATCH 0/5] isolation: 1Hz residual tick offloading v2 Frederic Weisbecker
2017-12-21 17:14 ` [PATCH 4/5] sched/isolation: Residual 1Hz scheduler tick offload Frederic Weisbecker
2017-12-30 3:55 [PATCH 0/5] isolation: 1Hz residual tick offloading v3 Frederic Weisbecker
2017-12-30 3:55 ` [PATCH 4/5] sched/isolation: Residual 1Hz scheduler tick offload Frederic Weisbecker
2018-01-04 4:25 [GIT PULL] isolation: 1Hz residual tick offloading v3 Frederic Weisbecker
2018-01-04 4:25 ` [PATCH 4/5] sched/isolation: Residual 1Hz scheduler tick offload Frederic Weisbecker
2018-01-12 19:22 ` Luiz Capitulino
2018-01-16 15:57 ` Frederic Weisbecker
2018-01-16 16:53 ` Luiz Capitulino
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1513653838-31314-5-git-send-email-frederic@kernel.org \
--to=frederic@kernel.org \
--cc=cl@linux.com \
--cc=cmetcalf@mellanox.com \
--cc=efault@gmx.de \
--cc=kernellwp@gmail.com \
--cc=lcapitulino@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=riel@redhat.com \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.