The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: Fernand Sieber <sieberf@amazon.com>
To: Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>
Cc: Tejun Heo <tj@kernel.org>, David Vernet <void@manifault.com>,
	Andrea Righi <arighi@nvidia.com>,
	Changwoo Min <changwoo@igalia.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	<linux-kernel@vger.kernel.org>, <nh-open-source@amazon.com>,
	Fahad Mubeen <fmubeen@amazon.de>,
	"Hendrik Borghorst" <hborghor@amazon.de>,
	David Woodhouse <dwmw@amazon.co.uk>,
	Fernand Sieber <sieberf@amazon.com>
Subject: [PATCH 2/2] sched/ext: add cgroup_set_runtime ops callback
Date: Mon, 25 May 2026 21:36:22 +0200	[thread overview]
Message-ID: <20260525193622.70282-3-sieberf@amazon.com> (raw)
In-Reply-To: <20260525193622.70282-1-sieberf@amazon.com>

Add a sched_ext_ops callback that is invoked when userspace writes to
cpu.max.runtime. This allows BPF schedulers to be notified when runtime
credits are injected into a cgroup, enabling SCX-side credit tracking.

The callback includes change detection (only fires when the value
changes) and caches the value in tg->scx.bw_runtime_us.

Signed-off-by: Fernand Sieber <sieberf@amazon.com>
---
 include/linux/sched/ext.h   |  1 +
 kernel/sched/core.c         |  2 ++
 kernel/sched/ext.c          | 17 +++++++++++++++++
 kernel/sched/ext.h          |  2 ++
 kernel/sched/ext_internal.h | 12 ++++++++++++
 5 files changed, 34 insertions(+)

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 2129e18ad..591801a50 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -273,6 +273,7 @@ struct scx_task_group {
 	u64			bw_period_us;
 	u64			bw_quota_us;
 	u64			bw_burst_us;
+	u64			bw_runtime_us;
 	bool			idle;
 #endif
 };
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d92e5840b..369dd03d3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10165,6 +10165,8 @@ static int cpu_runtime_write_u64(struct cgroup_subsys_state *css,
 	cfs_b->runtime = (u64)runtime_us * NSEC_PER_USEC;
 	raw_spin_unlock_irq(&cfs_b->lock);
 
+
+	scx_group_set_runtime(tg, runtime_us);
 	return 0;
 }
 
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 827a96e39..2ce505ad8 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4488,6 +4488,23 @@ void scx_group_set_bandwidth(struct task_group *tg,
 
 	percpu_up_read(&scx_cgroup_ops_rwsem);
 }
+
+void scx_group_set_runtime(struct task_group *tg, u64 runtime_us)
+{
+	struct scx_sched *sch;
+
+	percpu_down_read(&scx_cgroup_ops_rwsem);
+	sch = scx_root;
+
+	if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_runtime) &&
+	    tg->scx.bw_runtime_us != runtime_us)
+		SCX_CALL_OP(sch, cgroup_set_runtime, NULL,
+			    tg_cgrp(tg), runtime_us);
+
+	tg->scx.bw_runtime_us = runtime_us;
+
+	percpu_up_read(&scx_cgroup_ops_rwsem);
+}
 #endif	/* CONFIG_EXT_GROUP_SCHED */
 
 #if defined(CONFIG_EXT_GROUP_SCHED) || defined(CONFIG_EXT_SUB_SCHED)
diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
index 0b7fc46ae..00103ec3d 100644
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext.h
@@ -81,6 +81,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset);
 void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight);
 void scx_group_set_idle(struct task_group *tg, bool idle);
 void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us);
+void scx_group_set_runtime(struct task_group *tg, u64 runtime_us);
 #else	/* CONFIG_EXT_GROUP_SCHED */
 static inline void scx_tg_init(struct task_group *tg) {}
 static inline int scx_tg_online(struct task_group *tg) { return 0; }
@@ -91,5 +92,6 @@ static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {}
 static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {}
 static inline void scx_group_set_idle(struct task_group *tg, bool idle) {}
 static inline void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us) {}
+static inline void scx_group_set_runtime(struct task_group *tg, u64 runtime_us) {}
 #endif	/* CONFIG_EXT_GROUP_SCHED */
 #endif	/* CONFIG_CGROUP_SCHED */
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index a075732d4..21e6ab7af 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -739,6 +739,18 @@ struct sched_ext_ops {
 	 */
 	void (*cgroup_set_idle)(struct cgroup *cgrp, bool idle);
 
+	/**
+	 * @cgroup_set_runtime: A cgroup's runtime is being set directly
+	 * @cgrp: cgroup whose runtime is being set
+	 * @runtime_us: runtime in microseconds
+	 *
+	 * Update @cgrp's available runtime. This is from the cpu.max.runtime
+	 * cgroup interface. @runtime_us is the total runtime budget that the
+	 * cgroup may consume. The BPF scheduler should track this value and
+	 * throttle tasks in @cgrp once the budget is exhausted.
+	 */
+	void (*cgroup_set_runtime)(struct cgroup *cgrp, u64 runtime_us);
+
 #endif	/* CONFIG_EXT_GROUP_SCHED */
 
 	/**
-- 
2.47.3




Amazon Development Centre (South Africa) (Proprietary) Limited
29 Gogosoa Street, Observatory, Cape Town, Western Cape, 7925, South Africa
Registration Number: 2004 / 034463 / 07


      parent reply	other threads:[~2026-05-25 19:38 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-25 19:36 [PATCH 0/2] sched/fair: expose cpu.max.runtime for credit injection Fernand Sieber
2026-05-25 19:36 ` [PATCH 1/2] sched/fair: expose cpu.max.runtime to set bandwidth runtime directly Fernand Sieber
2026-05-26 20:52   ` Benjamin Segall
2026-05-28  7:25     ` Fernand Sieber
2026-05-27 19:04   ` Tejun Heo
2026-05-28  6:54     ` Fernand Sieber
2026-05-28 14:37       ` Tejun Heo
2026-05-25 19:36 ` Fernand Sieber [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260525193622.70282-3-sieberf@amazon.com \
    --to=sieberf@amazon.com \
    --cc=arighi@nvidia.com \
    --cc=bsegall@google.com \
    --cc=changwoo@igalia.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=dwmw@amazon.co.uk \
    --cc=fmubeen@amazon.de \
    --cc=hborghor@amazon.de \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=nh-open-source@amazon.com \
    --cc=peterz@infradead.org \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=void@manifault.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox