* [PATCH 1/2] [tip: sched/core] sched: Disable PLACE_LAG and RUN_TO_PARITY
[not found] <20241017052000.99200-1-cpru@amazon.com>
@ 2024-10-17 5:19 ` Cristian Prundeanu
2024-10-17 5:20 ` [PATCH 2/2] [tip: sched/core] sched: Move PLACE_LAG and RUN_TO_PARITY to sysctl Cristian Prundeanu
1 sibling, 0 replies; 2+ messages in thread
From: Cristian Prundeanu @ 2024-10-17 5:19 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, x86, linux-arm-kernel,
Bjoern Doebel, Hazem Mohamed Abuelfotoh, Geoff Blake, Ali Saidi,
Csaba Csoma, Cristian Prundeanu, stable
With these features are enabled, the EEVDF scheduler introduces a large
performance degradation, observed in multiple database tests on kernel
versions using EEVDF, across multiple architectures (x86, aarch64, amd64)
and CPU generations.
Disable the features to minimize default performance impact.
Cc: <stable@vger.kernel.org> # 6.6.x
Fixes: 86bfbb7ce4f6 ("sched/fair: Add lag based placement")
Fixes: 63304558ba5d ("sched/eevdf: Curb wakeup-preemption")
Signed-off-by: Cristian Prundeanu <cpru@amazon.com>
---
kernel/sched/features.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index a3d331dd2d8f..8a5ca80665b3 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -4,7 +4,7 @@
* Using the avg_vruntime, do the right thing and preserve lag across
* sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
*/
-SCHED_FEAT(PLACE_LAG, true)
+SCHED_FEAT(PLACE_LAG, false)
/*
* Give new tasks half a slice to ease into the competition.
*/
@@ -17,7 +17,7 @@ SCHED_FEAT(PLACE_REL_DEADLINE, true)
* Inhibit (wakeup) preemption until the current task has either matched the
* 0-lag point or until is has exhausted it's slice.
*/
-SCHED_FEAT(RUN_TO_PARITY, true)
+SCHED_FEAT(RUN_TO_PARITY, false)
/*
* Allow wakeup of tasks with a shorter slice to cancel RUN_TO_PARITY for
* current.
--
2.40.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH 2/2] [tip: sched/core] sched: Move PLACE_LAG and RUN_TO_PARITY to sysctl
[not found] <20241017052000.99200-1-cpru@amazon.com>
2024-10-17 5:19 ` [PATCH 1/2] [tip: sched/core] sched: Disable PLACE_LAG and RUN_TO_PARITY Cristian Prundeanu
@ 2024-10-17 5:20 ` Cristian Prundeanu
1 sibling, 0 replies; 2+ messages in thread
From: Cristian Prundeanu @ 2024-10-17 5:20 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, Peter Zijlstra, Ingo Molnar, x86, linux-arm-kernel,
Bjoern Doebel, Hazem Mohamed Abuelfotoh, Geoff Blake, Ali Saidi,
Csaba Csoma, Cristian Prundeanu, stable
These two scheduler features have a high impact on performance for some
database workloads. Move them to sysctl as they are likely to be modified
and persisted across reboots.
Cc: <stable@vger.kernel.org> # 6.6.x
Fixes: 86bfbb7ce4f6 ("sched/fair: Add lag based placement")
Fixes: 63304558ba5d ("sched/eevdf: Curb wakeup-preemption")
Signed-off-by: Cristian Prundeanu <cpru@amazon.com>
---
include/linux/sched/sysctl.h | 8 ++++++++
kernel/sched/core.c | 13 +++++++++++++
kernel/sched/fair.c | 5 +++--
kernel/sched/features.h | 10 ----------
kernel/sysctl.c | 20 ++++++++++++++++++++
5 files changed, 44 insertions(+), 12 deletions(-)
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 5a64582b086b..0258fba3896a 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -29,4 +29,12 @@ extern int sysctl_numa_balancing_mode;
#define sysctl_numa_balancing_mode 0
#endif
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
+extern unsigned int sysctl_sched_place_lag_enabled;
+extern unsigned int sysctl_sched_run_to_parity_enabled;
+#else
+#define sysctl_sched_place_lag_enabled 0
+#define sysctl_sched_run_to_parity_enabled 0
+#endif
+
#endif /* _LINUX_SCHED_SYSCTL_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 43e453ab7e20..c6bd1bda8c7e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -134,6 +134,19 @@ const_debug unsigned int sysctl_sched_features =
0;
#undef SCHED_FEAT
+#ifdef CONFIG_SYSCTL
+/*
+ * Using the avg_vruntime, do the right thing and preserve lag across
+ * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
+ */
+__read_mostly unsigned int sysctl_sched_place_lag_enabled = 0;
+/*
+ * Inhibit (wakeup) preemption until the current task has either matched the
+ * 0-lag point or until is has exhausted it's slice.
+ */
+__read_mostly unsigned int sysctl_sched_run_to_parity_enabled = 0;
+#endif
+
/*
* Print a warning if need_resched is set for the given duration (if
* LATENCY_WARN is enabled).
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5a621210c9c1..c58b76233f59 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -925,7 +925,8 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
* Once selected, run a task until it either becomes non-eligible or
* until it gets a new slice. See the HACK in set_next_entity().
*/
- if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
+ if (sysctl_sched_run_to_parity_enabled &&
+ curr && curr->vlag == curr->deadline)
return curr;
/* Pick the leftmost entity if it's eligible */
@@ -5280,7 +5281,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* EEVDF: placement strategy #1 / #2
*/
- if (sched_feat(PLACE_LAG) && cfs_rq->nr_running && se->vlag) {
+ if (sysctl_sched_place_lag_enabled && cfs_rq->nr_running && se->vlag) {
struct sched_entity *curr = cfs_rq->curr;
unsigned long load;
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 8a5ca80665b3..b39a9dde0b54 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -1,10 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Using the avg_vruntime, do the right thing and preserve lag across
- * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
- */
-SCHED_FEAT(PLACE_LAG, false)
/*
* Give new tasks half a slice to ease into the competition.
*/
@@ -13,11 +8,6 @@ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
* Preserve relative virtual deadline on 'migration'.
*/
SCHED_FEAT(PLACE_REL_DEADLINE, true)
-/*
- * Inhibit (wakeup) preemption until the current task has either matched the
- * 0-lag point or until is has exhausted it's slice.
- */
-SCHED_FEAT(RUN_TO_PARITY, false)
/*
* Allow wakeup of tasks with a shorter slice to cancel RUN_TO_PARITY for
* current.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 79e6cb1d5c48..f435b741654a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2029,6 +2029,26 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_INT_MAX,
},
#endif
+#ifdef CONFIG_SCHED_DEBUG
+ {
+ .procname = "sched_place_lag_enabled",
+ .data = &sysctl_sched_place_lag_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "sched_run_to_parity_enabled",
+ .data = &sysctl_sched_run_to_parity_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif
};
static struct ctl_table vm_table[] = {
--
2.40.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-10-17 5:20 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20241017052000.99200-1-cpru@amazon.com>
2024-10-17 5:19 ` [PATCH 1/2] [tip: sched/core] sched: Disable PLACE_LAG and RUN_TO_PARITY Cristian Prundeanu
2024-10-17 5:20 ` [PATCH 2/2] [tip: sched/core] sched: Move PLACE_LAG and RUN_TO_PARITY to sysctl Cristian Prundeanu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox