From: Fernand Sieber <sieberf@amazon.com>
To: Ingo Molnar <mingo@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Juri Lelli <juri.lelli@redhat.com>,
Vincent Guittot <vincent.guittot@linaro.org>
Cc: Tejun Heo <tj@kernel.org>, David Vernet <void@manifault.com>,
Andrea Righi <arighi@nvidia.com>,
Changwoo Min <changwoo@igalia.com>,
Dietmar Eggemann <dietmar.eggemann@arm.com>,
Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
<linux-kernel@vger.kernel.org>, <nh-open-source@amazon.com>,
Fahad Mubeen <fmubeen@amazon.de>,
"Hendrik Borghorst" <hborghor@amazon.de>,
David Woodhouse <dwmw@amazon.co.uk>,
Fernand Sieber <sieberf@amazon.com>
Subject: [PATCH 2/2] sched/ext: add cgroup_set_runtime ops callback
Date: Mon, 25 May 2026 21:36:22 +0200 [thread overview]
Message-ID: <20260525193622.70282-3-sieberf@amazon.com> (raw)
In-Reply-To: <20260525193622.70282-1-sieberf@amazon.com>
Add a sched_ext_ops callback that is invoked when userspace writes to
cpu.max.runtime. This allows BPF schedulers to be notified when runtime
credits are injected into a cgroup, enabling SCX-side credit tracking.
The callback includes change detection (only fires when the value
changes) and caches the value in tg->scx.bw_runtime_us.
Signed-off-by: Fernand Sieber <sieberf@amazon.com>
---
include/linux/sched/ext.h | 1 +
kernel/sched/core.c | 2 ++
kernel/sched/ext.c | 17 +++++++++++++++++
kernel/sched/ext.h | 2 ++
kernel/sched/ext_internal.h | 12 ++++++++++++
5 files changed, 34 insertions(+)
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 2129e18ad..591801a50 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -273,6 +273,7 @@ struct scx_task_group {
u64 bw_period_us;
u64 bw_quota_us;
u64 bw_burst_us;
+ u64 bw_runtime_us;
bool idle;
#endif
};
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d92e5840b..369dd03d3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10165,6 +10165,8 @@ static int cpu_runtime_write_u64(struct cgroup_subsys_state *css,
cfs_b->runtime = (u64)runtime_us * NSEC_PER_USEC;
raw_spin_unlock_irq(&cfs_b->lock);
+
+ scx_group_set_runtime(tg, runtime_us);
return 0;
}
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 827a96e39..2ce505ad8 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4488,6 +4488,23 @@ void scx_group_set_bandwidth(struct task_group *tg,
percpu_up_read(&scx_cgroup_ops_rwsem);
}
+
+void scx_group_set_runtime(struct task_group *tg, u64 runtime_us)
+{
+ struct scx_sched *sch;
+
+ percpu_down_read(&scx_cgroup_ops_rwsem);
+ sch = scx_root;
+
+ if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_runtime) &&
+ tg->scx.bw_runtime_us != runtime_us)
+ SCX_CALL_OP(sch, cgroup_set_runtime, NULL,
+ tg_cgrp(tg), runtime_us);
+
+ tg->scx.bw_runtime_us = runtime_us;
+
+ percpu_up_read(&scx_cgroup_ops_rwsem);
+}
#endif /* CONFIG_EXT_GROUP_SCHED */
#if defined(CONFIG_EXT_GROUP_SCHED) || defined(CONFIG_EXT_SUB_SCHED)
diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
index 0b7fc46ae..00103ec3d 100644
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext.h
@@ -81,6 +81,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset);
void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight);
void scx_group_set_idle(struct task_group *tg, bool idle);
void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us);
+void scx_group_set_runtime(struct task_group *tg, u64 runtime_us);
#else /* CONFIG_EXT_GROUP_SCHED */
static inline void scx_tg_init(struct task_group *tg) {}
static inline int scx_tg_online(struct task_group *tg) { return 0; }
@@ -91,5 +92,6 @@ static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {}
static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {}
static inline void scx_group_set_idle(struct task_group *tg, bool idle) {}
static inline void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us) {}
+static inline void scx_group_set_runtime(struct task_group *tg, u64 runtime_us) {}
#endif /* CONFIG_EXT_GROUP_SCHED */
#endif /* CONFIG_CGROUP_SCHED */
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index a075732d4..21e6ab7af 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -739,6 +739,18 @@ struct sched_ext_ops {
*/
void (*cgroup_set_idle)(struct cgroup *cgrp, bool idle);
+ /**
+ * @cgroup_set_runtime: A cgroup's runtime is being set directly
+ * @cgrp: cgroup whose runtime is being set
+ * @runtime_us: runtime in microseconds
+ *
+ * Update @cgrp's available runtime. This is from the cpu.max.runtime
+ * cgroup interface. @runtime_us is the total runtime budget that the
+ * cgroup may consume. The BPF scheduler should track this value and
+ * throttle tasks in @cgrp once the budget is exhausted.
+ */
+ void (*cgroup_set_runtime)(struct cgroup *cgrp, u64 runtime_us);
+
#endif /* CONFIG_EXT_GROUP_SCHED */
/**
--
2.47.3
Amazon Development Centre (South Africa) (Proprietary) Limited
29 Gogosoa Street, Observatory, Cape Town, Western Cape, 7925, South Africa
Registration Number: 2004 / 034463 / 07
prev parent reply other threads:[~2026-05-25 19:38 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-25 19:36 [PATCH 0/2] sched/fair: expose cpu.max.runtime for credit injection Fernand Sieber
2026-05-25 19:36 ` [PATCH 1/2] sched/fair: expose cpu.max.runtime to set bandwidth runtime directly Fernand Sieber
2026-05-26 20:52 ` Benjamin Segall
2026-05-28 7:25 ` Fernand Sieber
2026-05-27 19:04 ` Tejun Heo
2026-05-28 6:54 ` Fernand Sieber
2026-05-28 14:37 ` Tejun Heo
2026-05-25 19:36 ` Fernand Sieber [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260525193622.70282-3-sieberf@amazon.com \
--to=sieberf@amazon.com \
--cc=arighi@nvidia.com \
--cc=bsegall@google.com \
--cc=changwoo@igalia.com \
--cc=dietmar.eggemann@arm.com \
--cc=dwmw@amazon.co.uk \
--cc=fmubeen@amazon.de \
--cc=hborghor@amazon.de \
--cc=juri.lelli@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=nh-open-source@amazon.com \
--cc=peterz@infradead.org \
--cc=tj@kernel.org \
--cc=vincent.guittot@linaro.org \
--cc=void@manifault.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox