* [PATCH 1/2] sched/uclamp: Delegate uclamp to each sched_class
2024-07-03 10:07 [PATCH 0/2] Move uclamp to each sched_class Hongyan Xia
@ 2024-07-03 10:07 ` Hongyan Xia
2024-07-03 18:13 ` Tejun Heo
2024-07-03 10:07 ` [PATCH 2/2] sched/ext: Add BPF functions for uclamp inc and dec Hongyan Xia
2024-07-03 18:13 ` [PATCH 0/2] Move uclamp to each sched_class Tejun Heo
2 siblings, 1 reply; 7+ messages in thread
From: Hongyan Xia @ 2024-07-03 10:07 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-kernel
uclamp-related operations are written in core.c in a way that may
suggest it is a global thing regardless of sched_class, but we end up
checking sched_class->uclamp_enabled, so in reality it is sched_class
specific anyway.
Remove sched_class->uclamp_enabled and simply delegate uclamp to
sched_class->{enqueue,dequeue}_task(). This also removes extra
uclamp_enabled checks in uclamp_rq_{inc,dec}().
No functional changes for fair and RT.
Signed-off-by: Hongyan Xia <hongyan.xia2@arm.com>
---
kernel/sched/core.c | 14 ++------------
kernel/sched/ext.c | 4 ----
kernel/sched/fair.c | 6 ++----
kernel/sched/rt.c | 7 +++----
kernel/sched/sched.h | 15 +++++++++++----
5 files changed, 18 insertions(+), 28 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1092955a7d6e..5c5e5aefca81 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1671,7 +1671,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
}
}
-static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
+void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
{
enum uclamp_id clamp_id;
@@ -1684,9 +1684,6 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
if (!static_branch_unlikely(&sched_uclamp_used))
return;
- if (unlikely(!p->sched_class->uclamp_enabled))
- return;
-
for_each_clamp_id(clamp_id)
uclamp_rq_inc_id(rq, p, clamp_id);
@@ -1695,7 +1692,7 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
}
-static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
+void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
{
enum uclamp_id clamp_id;
@@ -1708,9 +1705,6 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
if (!static_branch_unlikely(&sched_uclamp_used))
return;
- if (unlikely(!p->sched_class->uclamp_enabled))
- return;
-
for_each_clamp_id(clamp_id)
uclamp_rq_dec_id(rq, p, clamp_id);
}
@@ -1949,8 +1943,6 @@ static void __init init_uclamp(void)
}
#else /* !CONFIG_UCLAMP_TASK */
-static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
-static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
static inline void uclamp_fork(struct task_struct *p) { }
static inline void uclamp_post_fork(struct task_struct *p) { }
static inline void init_uclamp(void) { }
@@ -1990,7 +1982,6 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED));
}
- uclamp_rq_inc(rq, p);
p->sched_class->enqueue_task(rq, p, flags);
if (sched_core_enabled(rq))
@@ -2010,7 +2001,6 @@ void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
psi_dequeue(p, flags & DEQUEUE_SLEEP);
}
- uclamp_rq_dec(rq, p);
p->sched_class->dequeue_task(rq, p, flags);
}
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index ae9ec8f542f2..0b120104a7ce 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3520,10 +3520,6 @@ DEFINE_SCHED_CLASS(ext) = {
.prio_changed = prio_changed_scx,
.update_curr = update_curr_scx,
-
-#ifdef CONFIG_UCLAMP_TASK
- .uclamp_enabled = 1,
-#endif
};
static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d59537416865..a861fafd53f9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6752,6 +6752,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
int idle_h_nr_running = task_has_idle_policy(p);
int task_new = !(flags & ENQUEUE_WAKEUP);
+ uclamp_rq_inc(rq, p);
/*
* The code below (indirectly) updates schedutil which looks at
* the cfs_rq utilization to select a frequency.
@@ -6846,6 +6847,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
int idle_h_nr_running = task_has_idle_policy(p);
bool was_sched_idle = sched_idle_rq(rq);
+ uclamp_rq_dec(rq, p);
util_est_dequeue(&rq->cfs, p);
for_each_sched_entity(se) {
@@ -13227,10 +13229,6 @@ DEFINE_SCHED_CLASS(fair) = {
#ifdef CONFIG_SCHED_CORE
.task_is_throttled = task_is_throttled_fair,
#endif
-
-#ifdef CONFIG_UCLAMP_TASK
- .uclamp_enabled = 1,
-#endif
};
#ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 63e49c8ffc4d..f0b3ba5e8867 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1480,6 +1480,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct sched_rt_entity *rt_se = &p->rt;
+ uclamp_rq_inc(rq, p);
+
if (flags & ENQUEUE_WAKEUP)
rt_se->timeout = 0;
@@ -1496,6 +1498,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct sched_rt_entity *rt_se = &p->rt;
+ uclamp_rq_dec(rq, p);
update_curr_rt(rq);
dequeue_rt_entity(rt_se, flags);
@@ -2680,10 +2683,6 @@ DEFINE_SCHED_CLASS(rt) = {
#ifdef CONFIG_SCHED_CORE
.task_is_throttled = task_is_throttled_rt,
#endif
-
-#ifdef CONFIG_UCLAMP_TASK
- .uclamp_enabled = 1,
-#endif
};
#ifdef CONFIG_RT_GROUP_SCHED
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 963a2fa180ad..619b1cc972bd 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2340,10 +2340,6 @@ extern s64 update_curr_common(struct rq *rq);
struct sched_class {
-#ifdef CONFIG_UCLAMP_TASK
- int uclamp_enabled;
-#endif
-
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
@@ -3246,6 +3242,9 @@ uclamp_se_set(struct uclamp_se *uc_se, unsigned int value, bool user_defined)
uc_se->user_defined = user_defined;
}
+void uclamp_rq_inc(struct rq *rq, struct task_struct *p);
+void uclamp_rq_dec(struct rq *rq, struct task_struct *p);
+
#else /* !CONFIG_UCLAMP_TASK: */
static inline unsigned long
@@ -3283,6 +3282,14 @@ static inline bool uclamp_rq_is_idle(struct rq *rq)
return false;
}
+static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
+{
+}
+
+static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
+{
+}
+
#endif /* !CONFIG_UCLAMP_TASK */
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 2/2] sched/ext: Add BPF functions for uclamp inc and dec
2024-07-03 10:07 [PATCH 0/2] Move uclamp to each sched_class Hongyan Xia
2024-07-03 10:07 ` [PATCH 1/2] sched/uclamp: Delegate " Hongyan Xia
@ 2024-07-03 10:07 ` Hongyan Xia
2024-07-03 18:15 ` Tejun Heo
2024-07-03 18:13 ` [PATCH 0/2] Move uclamp to each sched_class Tejun Heo
2 siblings, 1 reply; 7+ messages in thread
From: Hongyan Xia @ 2024-07-03 10:07 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-kernel
A sched_ext scheduler may have different choices for uclamp:
1. Re-use the current uclamp implementation
2. Ignore uclamp completely
3. Have its own custom uclamp implemenation
We expose uclamp BPF functions and let the scheduler itself decide what
to do.
Signed-off-by: Hongyan Xia <hongyan.xia2@arm.com>
---
kernel/sched/ext.c | 12 ++++++++++++
tools/sched_ext/include/scx/common.bpf.h | 2 ++
2 files changed, 14 insertions(+)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 0b120104a7ce..48c553b6f0c3 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -6108,6 +6108,16 @@ __bpf_kfunc s32 scx_bpf_task_cpu(const struct task_struct *p)
return task_cpu(p);
}
+__bpf_kfunc void scx_bpf_uclamp_rq_inc(s32 cpu, struct task_struct *p)
+{
+ uclamp_rq_inc(cpu_rq(cpu), p);
+}
+
+__bpf_kfunc void scx_bpf_uclamp_rq_dec(s32 cpu, struct task_struct *p)
+{
+ uclamp_rq_dec(cpu_rq(cpu), p);
+}
+
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(scx_kfunc_ids_any)
@@ -6132,6 +6142,8 @@ BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU)
+BTF_ID_FLAGS(func, scx_bpf_uclamp_rq_inc)
+BTF_ID_FLAGS(func, scx_bpf_uclamp_rq_dec)
BTF_KFUNCS_END(scx_kfunc_ids_any)
static const struct btf_kfunc_id_set scx_kfunc_set_any = {
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index dbbda0e35c5d..85ddc94fb4c1 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -57,6 +57,8 @@ s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
+void scx_bpf_uclamp_rq_inc(s32 cpu, struct task_struct *p) __ksym;
+void scx_bpf_uclamp_rq_dec(s32 cpu, struct task_struct *p) __ksym;
static inline __attribute__((format(printf, 1, 2)))
void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread