From: Tejun Heo <tj@kernel.org>
To: torvalds@linux-foundation.org, mingo@redhat.com,
peterz@infradead.org, juri.lelli@redhat.com,
vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
bristot@redhat.com, vschneid@redhat.com, ast@kernel.org,
daniel@iogearbox.net, andrii@kernel.org, martin.lau@kernel.org,
joshdon@google.com, brho@google.com, pjt@google.com,
derkling@google.com, haoluo@google.com, dvernet@meta.com,
dschatzberg@meta.com, dskarlat@cs.cmu.edu, riel@surriel.com,
changwoo@igalia.com, himadrics@inria.fr, memxor@gmail.com,
andrea.righi@canonical.com, joel@joelfernandes.org
Cc: linux-kernel@vger.kernel.org, bpf@vger.kernel.org,
kernel-team@meta.com, Tejun Heo <tj@kernel.org>
Subject: [PATCH 13/39] sched_ext: Add boilerplate for extensible scheduler class
Date: Wed, 1 May 2024 05:09:48 -1000 [thread overview]
Message-ID: <20240501151312.635565-14-tj@kernel.org> (raw)
In-Reply-To: <20240501151312.635565-1-tj@kernel.org>
This adds dummy implementations of sched_ext interfaces which interact with
the scheduler core and hook them in the correct places. As they're all
dummies, this doesn't cause any behavior changes. This is split out to help
reviewing.
v2: balance_scx_on_up() dropped. This will be handled in sched_ext proper.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
---
include/linux/sched/ext.h | 12 ++++++++++++
kernel/fork.c | 2 ++
kernel/sched/core.c | 32 ++++++++++++++++++++++++--------
kernel/sched/cpufreq_schedutil.c | 4 +++-
kernel/sched/ext.h | 25 +++++++++++++++++++++++++
kernel/sched/idle.c | 2 ++
kernel/sched/sched.h | 2 ++
7 files changed, 70 insertions(+), 9 deletions(-)
create mode 100644 include/linux/sched/ext.h
create mode 100644 kernel/sched/ext.h
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
new file mode 100644
index 000000000000..a05dfcf533b0
--- /dev/null
+++ b/include/linux/sched/ext.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_EXT_H
+#define _LINUX_SCHED_EXT_H
+
+#ifdef CONFIG_SCHED_CLASS_EXT
+#error "NOT IMPLEMENTED YET"
+#else /* !CONFIG_SCHED_CLASS_EXT */
+
+static inline void sched_ext_free(struct task_struct *p) {}
+
+#endif /* CONFIG_SCHED_CLASS_EXT */
+#endif /* _LINUX_SCHED_EXT_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 02f12033db9c..6238b1ae3306 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -23,6 +23,7 @@
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
+#include <linux/sched/ext.h>
#include <linux/seq_file.h>
#include <linux/rtmutex.h>
#include <linux/init.h>
@@ -970,6 +971,7 @@ void __put_task_struct(struct task_struct *tsk)
WARN_ON(refcount_read(&tsk->usage));
WARN_ON(tsk == current);
+ sched_ext_free(tsk);
io_uring_free(tsk);
cgroup_free(tsk);
task_numa_free(tsk, true);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7542a39f1fde..0231905ec827 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4767,6 +4767,8 @@ late_initcall(sched_core_sysctl_init);
*/
int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
+ int ret;
+
__sched_fork(clone_flags, p);
/*
* We mark the process as NEW here. This guarantees that
@@ -4803,12 +4805,16 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->sched_reset_on_fork = 0;
}
- if (dl_prio(p->prio))
- return -EAGAIN;
- else if (rt_prio(p->prio))
+ scx_pre_fork(p);
+
+ if (dl_prio(p->prio)) {
+ ret = -EAGAIN;
+ goto out_cancel;
+ } else if (rt_prio(p->prio)) {
p->sched_class = &rt_sched_class;
- else
+ } else {
p->sched_class = &fair_sched_class;
+ }
init_entity_runnable_average(&p->se);
@@ -4826,6 +4832,10 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
RB_CLEAR_NODE(&p->pushable_dl_tasks);
#endif
return 0;
+
+out_cancel:
+ scx_cancel_fork(p);
+ return ret;
}
int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
@@ -4856,16 +4866,18 @@ int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
p->sched_class->task_fork(p);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
- return 0;
+ return scx_fork(p);
}
void sched_cancel_fork(struct task_struct *p)
{
+ scx_cancel_fork(p);
}
void sched_post_fork(struct task_struct *p)
{
uclamp_post_fork(p);
+ scx_post_fork(p);
}
unsigned long to_ratio(u64 period, u64 runtime)
@@ -6017,7 +6029,7 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
* We can terminate the balance pass as soon as we know there is
* a runnable task of @class priority or higher.
*/
- for_class_range(class, prev->sched_class, &idle_sched_class) {
+ for_balance_class_range(class, prev->sched_class, &idle_sched_class) {
if (class->balance(rq, prev, rf))
break;
}
@@ -6035,6 +6047,9 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
const struct sched_class *class;
struct task_struct *p;
+ if (scx_enabled())
+ goto restart;
+
/*
* Optimization: we know that if all tasks are in the fair class we can
* call that function directly, but only if the @prev task wasn't of a
@@ -6075,7 +6090,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
if (prev->dl_server)
prev->dl_server = NULL;
- for_each_class(class) {
+ for_each_active_class(class) {
p = class->pick_next_task(rq);
if (p)
return p;
@@ -6108,7 +6123,7 @@ static inline struct task_struct *pick_task(struct rq *rq)
const struct sched_class *class;
struct task_struct *p;
- for_each_class(class) {
+ for_each_active_class(class) {
p = class->pick_task(rq);
if (p)
return p;
@@ -10135,6 +10150,7 @@ void __init sched_init(void)
balance_push_set(smp_processor_id(), false);
#endif
init_sched_fair_class();
+ init_sched_ext_class();
psi_init();
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 972b7dd65af2..0827864c35ff 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -197,7 +197,9 @@ unsigned long sugov_effective_cpu_perf(int cpu, unsigned long actual,
static void sugov_get_util(struct sugov_cpu *sg_cpu, unsigned long boost)
{
- unsigned long min, max, util = cpu_util_cfs_boost(sg_cpu->cpu);
+ unsigned long min, max;
+ unsigned long util = cpu_util_cfs_boost(sg_cpu->cpu) +
+ scx_cpuperf_target(sg_cpu->cpu);
util = effective_cpu_util(sg_cpu->cpu, util, &min, &max);
util = max(util, boost);
diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
new file mode 100644
index 000000000000..42bd1e47b304
--- /dev/null
+++ b/kernel/sched/ext.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef CONFIG_SCHED_CLASS_EXT
+#error "NOT IMPLEMENTED YET"
+#else /* CONFIG_SCHED_CLASS_EXT */
+
+#define scx_enabled() false
+
+static inline void scx_pre_fork(struct task_struct *p) {}
+static inline int scx_fork(struct task_struct *p) { return 0; }
+static inline void scx_post_fork(struct task_struct *p) {}
+static inline void scx_cancel_fork(struct task_struct *p) {}
+static inline void init_sched_ext_class(void) {}
+static inline u32 scx_cpuperf_target(s32 cpu) { return 0; }
+
+#define for_each_active_class for_each_class
+#define for_balance_class_range for_class_range
+
+#endif /* CONFIG_SCHED_CLASS_EXT */
+
+#if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP)
+#error "NOT IMPLEMENTED YET"
+#else
+static inline void scx_update_idle(struct rq *rq, bool idle) {}
+#endif
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 6135fbe83d68..3b6540cc436a 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -458,11 +458,13 @@ static void wakeup_preempt_idle(struct rq *rq, struct task_struct *p, int flags)
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
+ scx_update_idle(rq, false);
}
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
{
update_idle_core(rq);
+ scx_update_idle(rq, true);
schedstat_inc(rq->sched_goidle);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 11b4345d2638..fbd1e9ea8b18 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3540,4 +3540,6 @@ enum cpu_cftype_id {
extern struct cftype cpu_cftypes[CPU_CFTYPE_CNT + 1];
#endif /* CONFIG_CGROUP_SCHED */
+#include "ext.h"
+
#endif /* _KERNEL_SCHED_SCHED_H */
--
2.44.0
next prev parent reply other threads:[~2024-05-01 15:13 UTC|newest]
Thread overview: 142+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-01 15:09 [PATCHSET v6] sched: Implement BPF extensible scheduler class Tejun Heo
2024-05-01 15:09 ` [PATCH 01/39] cgroup: Implement cgroup_show_cftypes() Tejun Heo
2024-05-01 15:09 ` [PATCH 02/39] sched: Restructure sched_class order sanity checks in sched_init() Tejun Heo
2024-05-01 15:09 ` [PATCH 03/39] sched: Allow sched_cgroup_fork() to fail and introduce sched_cancel_fork() Tejun Heo
2024-05-01 15:09 ` [PATCH 04/39] sched: Add sched_class->reweight_task() Tejun Heo
2024-06-24 10:23 ` Peter Zijlstra
2024-06-24 10:31 ` Peter Zijlstra
2024-06-24 23:59 ` Tejun Heo
2024-06-25 7:29 ` Peter Zijlstra
2024-06-25 23:57 ` Tejun Heo
2024-06-26 1:29 ` [PATCH sched/urgent] sched/fair: set_load_weight() must also call reweight_task() for SCHED_IDLE tasks Tejun Heo
2024-06-26 2:19 ` [PATCH sched_ext/for-6.11] sched_ext: Account for idle policy when setting p->scx.weight in scx_ops_enable_task() Tejun Heo
2024-07-08 19:29 ` [PATCH v2 " Tejun Heo
2024-07-08 15:07 ` [tip: sched/core] sched/fair: set_load_weight() must also call reweight_task() for SCHED_IDLE tasks tip-bot2 for Tejun Heo
2024-05-01 15:09 ` [PATCH 05/39] sched: Add sched_class->switching_to() and expose check_class_changing/changed() Tejun Heo
2024-06-24 11:06 ` Peter Zijlstra
2024-06-24 22:18 ` Tejun Heo
2024-06-25 8:16 ` Peter Zijlstra
2024-05-01 15:09 ` [PATCH 06/39] sched: Factor out cgroup weight conversion functions Tejun Heo
2024-05-01 15:09 ` [PATCH 07/39] sched: Expose css_tg() and __setscheduler_prio() Tejun Heo
2024-06-24 11:19 ` Peter Zijlstra
2024-06-24 18:56 ` Tejun Heo
2024-05-01 15:09 ` [PATCH 08/39] sched: Enumerate CPU cgroup file types Tejun Heo
2024-05-01 15:09 ` [PATCH 09/39] sched: Add @reason to sched_class->rq_{on|off}line() Tejun Heo
2024-06-24 11:32 ` Peter Zijlstra
2024-06-24 21:18 ` Tejun Heo
2024-06-25 8:29 ` Peter Zijlstra
2024-06-25 23:41 ` Tejun Heo
2024-06-26 8:23 ` Peter Zijlstra
2024-06-26 18:01 ` Tejun Heo
2024-06-27 1:27 ` [PATCH sched_ext/for-6.11] sched_ext: Disallow loading BPF scheduler if isolcpus= domain isolation is in effect Tejun Heo
2024-07-08 19:30 ` Tejun Heo
2024-05-01 15:09 ` [PATCH 10/39] sched: Factor out update_other_load_avgs() from __update_blocked_others() Tejun Heo
2024-06-24 12:35 ` Peter Zijlstra
2024-06-24 16:15 ` Vincent Guittot
2024-06-24 19:24 ` Tejun Heo
2024-06-25 9:13 ` Vincent Guittot
2024-06-26 20:49 ` Tejun Heo
2024-05-01 15:09 ` [PATCH 11/39] cpufreq_schedutil: Refactor sugov_cpu_is_busy() Tejun Heo
2024-05-01 15:09 ` [PATCH 12/39] sched: Add normal_policy() Tejun Heo
2024-05-01 15:09 ` Tejun Heo [this message]
2024-05-01 15:09 ` [PATCH 14/39] sched_ext: Implement BPF extensible scheduler class Tejun Heo
2024-05-01 15:09 ` [PATCH 15/39] sched_ext: Add scx_simple and scx_example_qmap example schedulers Tejun Heo
2024-05-01 15:09 ` [PATCH 16/39] sched_ext: Add sysrq-S which disables the BPF scheduler Tejun Heo
2024-05-01 15:09 ` [PATCH 17/39] sched_ext: Implement runnable task stall watchdog Tejun Heo
2024-05-01 15:09 ` [PATCH 18/39] sched_ext: Allow BPF schedulers to disallow specific tasks from joining SCHED_EXT Tejun Heo
2024-06-24 12:40 ` Peter Zijlstra
2024-06-24 19:06 ` Tejun Heo
2024-05-01 15:09 ` [PATCH 19/39] sched_ext: Print sched_ext info when dumping stack Tejun Heo
2024-06-24 12:46 ` Peter Zijlstra
2024-06-24 14:25 ` Linus Torvalds
2024-06-24 18:34 ` Tejun Heo
2024-05-01 15:09 ` [PATCH 20/39] sched_ext: Print debug dump after an error exit Tejun Heo
2024-05-01 15:09 ` [PATCH 21/39] tools/sched_ext: Add scx_show_state.py Tejun Heo
2024-05-01 15:09 ` [PATCH 22/39] sched_ext: Implement scx_bpf_kick_cpu() and task preemption support Tejun Heo
2024-05-01 15:09 ` [PATCH 23/39] sched_ext: Add a central scheduler which makes all scheduling decisions on one CPU Tejun Heo
2024-05-01 15:09 ` [PATCH 24/39] sched_ext: Make watchdog handle ops.dispatch() looping stall Tejun Heo
2024-05-01 15:10 ` [PATCH 25/39] sched_ext: Add task state tracking operations Tejun Heo
2024-05-01 15:10 ` [PATCH 26/39] sched_ext: Implement tickless support Tejun Heo
2024-05-01 15:10 ` [PATCH 27/39] sched_ext: Track tasks that are subjects of the in-flight SCX operation Tejun Heo
2024-05-01 15:10 ` [PATCH 28/39] sched_ext: Add cgroup support Tejun Heo
2024-05-01 15:10 ` [PATCH 29/39] sched_ext: Add a cgroup scheduler which uses flattened hierarchy Tejun Heo
2024-05-01 15:10 ` [PATCH 30/39] sched_ext: Implement SCX_KICK_WAIT Tejun Heo
2024-05-01 15:10 ` [PATCH 31/39] sched_ext: Implement sched_ext_ops.cpu_acquire/release() Tejun Heo
2024-05-01 15:10 ` [PATCH 32/39] sched_ext: Implement sched_ext_ops.cpu_online/offline() Tejun Heo
2024-05-01 15:10 ` [PATCH 33/39] sched_ext: Bypass BPF scheduler while PM events are in progress Tejun Heo
2024-05-01 15:10 ` [PATCH 34/39] sched_ext: Implement core-sched support Tejun Heo
2024-05-01 15:10 ` [PATCH 35/39] sched_ext: Add vtime-ordered priority queue to dispatch_q's Tejun Heo
2024-05-01 15:10 ` [PATCH 36/39] sched_ext: Implement DSQ iterator Tejun Heo
2024-05-01 15:10 ` [PATCH 37/39] sched_ext: Add cpuperf support Tejun Heo
2024-05-01 15:10 ` [PATCH 38/39] sched_ext: Documentation: scheduler: Document extensible scheduler class Tejun Heo
2024-05-02 2:24 ` Bagas Sanjaya
2024-05-01 15:10 ` [PATCH 39/39] sched_ext: Add selftests Tejun Heo
2024-05-02 8:48 ` [PATCHSET v6] sched: Implement BPF extensible scheduler class Peter Zijlstra
2024-05-02 19:20 ` Tejun Heo
2024-05-03 8:52 ` Peter Zijlstra
2024-05-05 23:31 ` Tejun Heo
2024-05-13 8:03 ` Peter Zijlstra
2024-05-13 18:26 ` Steven Rostedt
2024-05-14 0:07 ` Qais Yousef
2024-05-14 21:34 ` David Vernet
2024-05-27 21:25 ` Qais Yousef
2024-05-28 23:46 ` Tejun Heo
2024-05-29 22:09 ` Qais Yousef
2024-05-17 9:58 ` Peter Zijlstra
2024-05-27 20:29 ` Qais Yousef
2024-05-14 20:22 ` Chris Mason
2024-05-14 22:06 ` Josh Don
2024-05-15 20:41 ` Tejun Heo
2024-05-21 0:19 ` Tejun Heo
2024-05-30 16:49 ` Tejun Heo
2024-05-06 18:47 ` Rik van Riel
2024-05-07 19:33 ` Tejun Heo
2024-05-07 19:47 ` Rik van Riel
2024-05-09 7:38 ` Changwoo Min
2024-05-10 18:24 ` Peter Jung
2024-05-13 20:36 ` Andrea Righi
2024-06-11 21:34 ` Linus Torvalds
2024-06-13 23:38 ` Tejun Heo
2024-06-19 20:56 ` Thomas Gleixner
2024-06-19 22:10 ` Linus Torvalds
2024-06-19 22:27 ` Thomas Gleixner
2024-06-19 22:55 ` Linus Torvalds
2024-06-20 2:35 ` Thomas Gleixner
2024-06-20 5:07 ` Linus Torvalds
2024-06-20 17:11 ` Linus Torvalds
2024-06-20 17:41 ` Tejun Heo
2024-06-20 22:15 ` [PATCH sched_ext/for-6.11] sched, sched_ext: Replace scx_next_task_picked() with sched_class->switch_class() Tejun Heo
2024-06-20 22:42 ` Linus Torvalds
2024-06-21 19:46 ` Tejun Heo
2024-06-24 9:04 ` Peter Zijlstra
2024-06-24 18:41 ` Tejun Heo
2024-06-24 9:02 ` Peter Zijlstra
2024-06-21 19:52 ` Tejun Heo
2024-06-24 8:59 ` Peter Zijlstra
2024-06-24 21:01 ` Tejun Heo
2024-06-25 7:49 ` Peter Zijlstra
2024-06-25 23:30 ` Tejun Heo
2024-06-26 8:28 ` Peter Zijlstra
2024-06-26 17:56 ` Tejun Heo
2024-06-20 18:47 ` [PATCHSET v6] sched: Implement BPF extensible scheduler class Thomas Gleixner
2024-06-20 19:20 ` Linus Torvalds
2024-06-21 9:35 ` Thomas Gleixner
2024-06-21 16:34 ` Linus Torvalds
2024-06-23 2:00 ` Tejun Heo
2024-06-23 10:31 ` Thomas Gleixner
2024-06-23 10:33 ` Thomas Gleixner
2024-06-24 14:23 ` Jason Gunthorpe
2024-06-20 19:58 ` Tejun Heo
2024-06-24 9:34 ` Peter Zijlstra
2024-06-24 20:17 ` Tejun Heo
2024-06-24 20:51 ` [PATCH sched_ext/for-6.11] sched, sched_ext: Simplify dl_prio() case handling in sched_fork() Tejun Heo
2024-07-08 18:56 ` Tejun Heo
2024-06-20 19:35 ` [PATCHSET v6] sched: Implement BPF extensible scheduler class Tejun Heo
2024-06-21 10:46 ` Thomas Gleixner
2024-06-21 21:14 ` Chris Mason
2024-06-23 8:14 ` Thomas Gleixner
2024-06-24 16:42 ` Chris Mason
2024-06-24 18:11 ` Tejun Heo
2024-06-24 22:01 ` Peter Oskolkov
2024-06-24 22:17 ` David Vernet
2024-06-24 21:54 ` Peter Oskolkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240501151312.635565-14-tj@kernel.org \
--to=tj@kernel.org \
--cc=andrea.righi@canonical.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=brho@google.com \
--cc=bristot@redhat.com \
--cc=bsegall@google.com \
--cc=changwoo@igalia.com \
--cc=daniel@iogearbox.net \
--cc=derkling@google.com \
--cc=dietmar.eggemann@arm.com \
--cc=dschatzberg@meta.com \
--cc=dskarlat@cs.cmu.edu \
--cc=dvernet@meta.com \
--cc=haoluo@google.com \
--cc=himadrics@inria.fr \
--cc=joel@joelfernandes.org \
--cc=joshdon@google.com \
--cc=juri.lelli@redhat.com \
--cc=kernel-team@meta.com \
--cc=linux-kernel@vger.kernel.org \
--cc=martin.lau@kernel.org \
--cc=memxor@gmail.com \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=pjt@google.com \
--cc=riel@surriel.com \
--cc=rostedt@goodmis.org \
--cc=torvalds@linux-foundation.org \
--cc=vincent.guittot@linaro.org \
--cc=vschneid@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox