From: Fernand Sieber <sieberf@amazon.com>
To: Ingo Molnar <mingo@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Juri Lelli <juri.lelli@redhat.com>,
Vincent Guittot <vincent.guittot@linaro.org>
Cc: Tejun Heo <tj@kernel.org>, David Vernet <void@manifault.com>,
Andrea Righi <arighi@nvidia.com>,
Changwoo Min <changwoo@igalia.com>,
Dietmar Eggemann <dietmar.eggemann@arm.com>,
Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
<linux-kernel@vger.kernel.org>, <nh-open-source@amazon.com>,
Fahad Mubeen <fmubeen@amazon.de>,
"Hendrik Borghorst" <hborghor@amazon.de>,
David Woodhouse <dwmw@amazon.co.uk>,
Fernand Sieber <sieberf@amazon.com>
Subject: [PATCH 2/2] sched/ext: add cgroup_set_runtime ops callback
Date: Mon, 25 May 2026 21:36:22 +0200 [thread overview]
Message-ID: <20260525193622.70282-3-sieberf@amazon.com> (raw)
In-Reply-To: <20260525193622.70282-1-sieberf@amazon.com>
Add a sched_ext_ops callback that is invoked when userspace writes to
cpu.max.runtime. This allows BPF schedulers to be notified when runtime
credits are injected into a cgroup, enabling SCX-side credit tracking.
The callback includes change detection (only fires when the value
changes) and caches the value in tg->scx.bw_runtime_us.
Signed-off-by: Fernand Sieber <sieberf@amazon.com>
---
include/linux/sched/ext.h | 1 +
kernel/sched/core.c | 2 ++
kernel/sched/ext.c | 17 +++++++++++++++++
kernel/sched/ext.h | 2 ++
kernel/sched/ext_internal.h | 12 ++++++++++++
5 files changed, 34 insertions(+)
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 2129e18ad..591801a50 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -273,6 +273,7 @@ struct scx_task_group {
u64 bw_period_us;
u64 bw_quota_us;
u64 bw_burst_us;
+ u64 bw_runtime_us;
bool idle;
#endif
};
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d92e5840b..369dd03d3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10165,6 +10165,8 @@ static int cpu_runtime_write_u64(struct cgroup_subsys_state *css,
cfs_b->runtime = (u64)runtime_us * NSEC_PER_USEC;
raw_spin_unlock_irq(&cfs_b->lock);
+
+ scx_group_set_runtime(tg, runtime_us);
return 0;
}
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 827a96e39..2ce505ad8 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4488,6 +4488,23 @@ void scx_group_set_bandwidth(struct task_group *tg,
percpu_up_read(&scx_cgroup_ops_rwsem);
}
+
+void scx_group_set_runtime(struct task_group *tg, u64 runtime_us)
+{
+ struct scx_sched *sch;
+
+ percpu_down_read(&scx_cgroup_ops_rwsem);
+ sch = scx_root;
+
+ if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_runtime) &&
+ tg->scx.bw_runtime_us != runtime_us)
+ SCX_CALL_OP(sch, cgroup_set_runtime, NULL,
+ tg_cgrp(tg), runtime_us);
+
+ tg->scx.bw_runtime_us = runtime_us;
+
+ percpu_up_read(&scx_cgroup_ops_rwsem);
+}
#endif /* CONFIG_EXT_GROUP_SCHED */
#if defined(CONFIG_EXT_GROUP_SCHED) || defined(CONFIG_EXT_SUB_SCHED)
diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
index 0b7fc46ae..00103ec3d 100644
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext.h
@@ -81,6 +81,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset);
void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight);
void scx_group_set_idle(struct task_group *tg, bool idle);
void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us);
+void scx_group_set_runtime(struct task_group *tg, u64 runtime_us);
#else /* CONFIG_EXT_GROUP_SCHED */
static inline void scx_tg_init(struct task_group *tg) {}
static inline int scx_tg_online(struct task_group *tg) { return 0; }
@@ -91,5 +92,6 @@ static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {}
static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {}
static inline void scx_group_set_idle(struct task_group *tg, bool idle) {}
static inline void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us) {}
+static inline void scx_group_set_runtime(struct task_group *tg, u64 runtime_us) {}
#endif /* CONFIG_EXT_GROUP_SCHED */
#endif /* CONFIG_CGROUP_SCHED */
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index a075732d4..21e6ab7af 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -739,6 +739,18 @@ struct sched_ext_ops {
*/
void (*cgroup_set_idle)(struct cgroup *cgrp, bool idle);
+ /**
+ * @cgroup_set_runtime: A cgroup's runtime is being set directly
+ * @cgrp: cgroup whose runtime is being set
+ * @runtime_us: runtime in microseconds
+ *
+ * Update @cgrp's available runtime. This is from the cpu.max.runtime
+ * cgroup interface. @runtime_us is the total runtime budget that the
+ * cgroup may consume. The BPF scheduler should track this value and
+ * throttle tasks in @cgrp once the budget is exhausted.
+ */
+ void (*cgroup_set_runtime)(struct cgroup *cgrp, u64 runtime_us);
+
#endif /* CONFIG_EXT_GROUP_SCHED */
/**
--
2.47.3
Amazon Development Centre (South Africa) (Proprietary) Limited
29 Gogosoa Street, Observatory, Cape Town, Western Cape, 7925, South Africa
Registration Number: 2004 / 034463 / 07
prev parent reply other threads:[~2026-05-25 19:38 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-25 19:36 [PATCH 0/2] sched/fair: expose cpu.max.runtime for credit injection Fernand Sieber
2026-05-25 19:36 ` [PATCH 1/2] sched/fair: expose cpu.max.runtime to set bandwidth runtime directly Fernand Sieber
2026-05-26 20:52 ` Benjamin Segall
2026-05-28 7:25 ` Fernand Sieber
2026-05-27 19:04 ` Tejun Heo
2026-05-28 6:54 ` Fernand Sieber
2026-05-28 14:37 ` Tejun Heo
2026-05-25 19:36 ` Fernand Sieber [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260525193622.70282-3-sieberf@amazon.com \
--to=sieberf@amazon.com \
--cc=arighi@nvidia.com \
--cc=bsegall@google.com \
--cc=changwoo@igalia.com \
--cc=dietmar.eggemann@arm.com \
--cc=dwmw@amazon.co.uk \
--cc=fmubeen@amazon.de \
--cc=hborghor@amazon.de \
--cc=juri.lelli@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=nh-open-source@amazon.com \
--cc=peterz@infradead.org \
--cc=tj@kernel.org \
--cc=vincent.guittot@linaro.org \
--cc=void@manifault.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.