From: Peter Zijlstra <peterz@infradead.org>
To: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: tj@kernel.org, linux-kernel@vger.kernel.org, mingo@redhat.com,
juri.lelli@redhat.com, vincent.guittot@linaro.org,
dietmar.eggemann@arm.com, rostedt@goodmis.org,
bsegall@google.com, mgorman@suse.de, vschneid@redhat.com,
longman@redhat.com, hannes@cmpxchg.org, mkoutny@suse.com,
void@manifault.com, arighi@nvidia.com, changwoo@igalia.com,
cgroups@vger.kernel.org, sched-ext@lists.linux.dev,
liuwenfang@honor.com, tglx@linutronix.de
Subject: Re: [PATCH 01/14] sched: Employ sched_change guards
Date: Thu, 11 Sep 2025 12:10:08 +0200 [thread overview]
Message-ID: <20250911101008.GD1386988@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20250911095523.GX3289052@noisy.programming.kicks-ass.net>
On Thu, Sep 11, 2025 at 11:55:23AM +0200, Peter Zijlstra wrote:
> On Thu, Sep 11, 2025 at 02:36:21PM +0530, K Prateek Nayak wrote:
> > Hello Peter,
> >
> > On 9/10/2025 9:14 PM, Peter Zijlstra wrote:
> > > @@ -9240,8 +9213,9 @@ static void sched_change_group(struct ta
> > > */
> > > void sched_move_task(struct task_struct *tsk, bool for_autogroup)
> > > {
> > > - int queued, running, queue_flags =
> > > + unsigned int queue_flags =
> > > DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
> >
> > nit.
> >
> > Since we don't do a complete dequeue for delayed task in
> > sched_move_task(), can we get rid of that DEQUEUE_NOCLOCK and ...
> >
> > > + bool resched = false;
> > > struct rq *rq;
> > >
> > > CLASS(task_rq_lock, rq_guard)(tsk);
> > > @@ -9249,28 +9223,12 @@ void sched_move_task(struct task_struct
> > >
> > > update_rq_clock(rq);
> >
> > ... this clock update and instead rely on sched_change_begin() to
> > handle it within the guard?
>
> Yeah, I suppose we could. But let me try and do that in a later patch,
> on-top of all this.
Something like so?
---
core.c | 33 +++++++++++----------------------
ext.c | 12 ++++--------
syscalls.c | 4 +---
3 files changed, 16 insertions(+), 33 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2359,10 +2359,8 @@ static void migrate_disable_switch(struc
if (p->cpus_ptr != &p->cpus_mask)
return;
- scoped_guard (task_rq_lock, p) {
- update_rq_clock(scope.rq);
+ scoped_guard (task_rq_lock, p)
do_set_cpus_allowed(p, &ac);
- }
}
void migrate_disable(void)
@@ -2716,9 +2714,7 @@ void set_cpus_allowed_common(struct task
static void
do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx)
{
- u32 flags = DEQUEUE_SAVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
-
- scoped_guard (sched_change, p, flags) {
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_LOCKED) {
p->sched_class->set_cpus_allowed(p, ctx);
mm_set_cpus_allowed(p->mm, ctx->new_mask);
}
@@ -2740,10 +2736,8 @@ void set_cpus_allowed_force(struct task_
struct rcu_head rcu;
};
- scoped_guard (__task_rq_lock, p) {
- update_rq_clock(scope.rq);
+ scoped_guard (__task_rq_lock, p)
do_set_cpus_allowed(p, &ac);
- }
/*
* Because this is called with p->pi_lock held, it is not possible
@@ -9159,16 +9153,13 @@ static void sched_change_group(struct ta
*/
void sched_move_task(struct task_struct *tsk, bool for_autogroup)
{
- unsigned int queue_flags =
- DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
+ unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_LOCKED;
bool resched = false;
struct rq *rq;
CLASS(task_rq_lock, rq_guard)(tsk);
rq = rq_guard.rq;
- update_rq_clock(rq);
-
scoped_guard (sched_change, tsk, queue_flags) {
sched_change_group(tsk);
if (!for_autogroup)
@@ -10852,19 +10843,17 @@ struct sched_change_ctx *sched_change_be
}
#endif
+ if (!(flags & DEQUEUE_NOCLOCK)) {
+ update_rq_clock(rq);
+ flags |= DEQUEUE_NOCLOCK;
+ }
+
if (flags & DEQUEUE_CLASS) {
if (WARN_ON_ONCE(flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)))
flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
- if (p->sched_class->switching_from) {
- /*
- * switching_from_fair() assumes CLASS implies NOCLOCK;
- * fixing this assumption would mean switching_from()
- * would need to be able to change flags.
- */
- WARN_ON(!(flags & DEQUEUE_NOCLOCK));
+ if (p->sched_class->switching_from)
p->sched_class->switching_from(rq, p);
- }
}
*ctx = (struct sched_change_ctx){
@@ -10915,7 +10904,7 @@ void sched_change_end(struct sched_chang
p->sched_class->switching_to(rq, p);
if (ctx->queued)
- enqueue_task(rq, p, ctx->flags | ENQUEUE_NOCLOCK);
+ enqueue_task(rq, p, ctx->flags);
if (ctx->running)
set_next_task(rq, p, ctx->flags);
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -5018,14 +5018,12 @@ static void scx_disable_workfn(struct kt
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
- unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE |
- DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
+ unsigned int queue_flags =
+ DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_LOCKED;
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
- update_rq_clock(task_rq(p));
-
if (old_class != new_class) {
queue_flags |= DEQUEUE_CLASS;
queue_flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
@@ -5763,8 +5761,8 @@ static int scx_enable(struct sched_ext_o
percpu_down_write(&scx_fork_rwsem);
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
- unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE |
- DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
+ unsigned int queue_flags =
+ DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_LOCKED;
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
@@ -5772,8 +5770,6 @@ static int scx_enable(struct sched_ext_o
if (!tryget_task_struct(p))
continue;
- update_rq_clock(task_rq(p));
-
if (old_class != new_class) {
queue_flags |= DEQUEUE_CLASS;
queue_flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -76,8 +76,6 @@ void set_user_nice(struct task_struct *p
CLASS(task_rq_lock, rq_guard)(p);
rq = rq_guard.rq;
- update_rq_clock(rq);
-
/*
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
@@ -89,7 +87,7 @@ void set_user_nice(struct task_struct *p
return;
}
- scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED) {
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_LOCKED) {
p->static_prio = NICE_TO_PRIO(nice);
set_load_weight(p, true);
old_prio = p->prio;
next prev parent reply other threads:[~2025-09-11 10:10 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-10 15:44 [PATCH 00/14] sched: Support shared runqueue locking Peter Zijlstra
2025-09-10 15:44 ` [PATCH 01/14] sched: Employ sched_change guards Peter Zijlstra
2025-09-11 9:06 ` K Prateek Nayak
2025-09-11 9:55 ` Peter Zijlstra
2025-09-11 10:10 ` Peter Zijlstra [this message]
2025-09-11 10:37 ` K Prateek Nayak
2025-10-06 15:21 ` Shrikanth Hegde
2025-10-06 18:14 ` Peter Zijlstra
2025-10-07 5:12 ` Shrikanth Hegde
2025-10-07 9:34 ` Peter Zijlstra
2025-10-16 9:33 ` [tip: sched/core] sched: Mandate shared flags for sched_change tip-bot2 for Peter Zijlstra
2025-09-10 15:44 ` [PATCH 02/14] sched: Re-arrange the {EN,DE}QUEUE flags Peter Zijlstra
2025-09-10 15:44 ` [PATCH 03/14] sched: Fold sched_class::switch{ing,ed}_{to,from}() into the change pattern Peter Zijlstra
2025-09-10 15:44 ` [PATCH 04/14] sched: Cleanup sched_delayed handling for class switches Peter Zijlstra
2025-09-10 15:44 ` [PATCH 05/14] sched: Move sched_class::prio_changed() into the change pattern Peter Zijlstra
2025-09-11 1:44 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 06/14] sched: Fix migrate_disable_switch() locking Peter Zijlstra
2025-09-10 15:44 ` [PATCH 07/14] sched: Fix do_set_cpus_allowed() locking Peter Zijlstra
2025-10-30 0:12 ` Mark Brown
2025-10-30 9:07 ` Peter Zijlstra
2025-10-30 12:47 ` Mark Brown
2025-09-10 15:44 ` [PATCH 08/14] sched: Rename do_set_cpus_allowed() Peter Zijlstra
2025-09-10 15:44 ` [PATCH 09/14] sched: Make __do_set_cpus_allowed() use the sched_change pattern Peter Zijlstra
2025-09-10 15:44 ` [PATCH 10/14] sched: Add locking comments to sched_class methods Peter Zijlstra
2025-09-10 15:44 ` [PATCH 11/14] sched: Add flags to {put_prev,set_next}_task() methods Peter Zijlstra
2025-09-10 15:44 ` [PATCH 12/14] sched: Add shared runqueue locking to __task_rq_lock() Peter Zijlstra
2025-09-12 0:19 ` Tejun Heo
2025-09-12 11:54 ` Peter Zijlstra
2025-09-12 14:11 ` Peter Zijlstra
2025-09-12 17:56 ` Tejun Heo
2025-09-15 8:38 ` Peter Zijlstra
2025-09-16 22:29 ` Tejun Heo
2025-09-16 22:41 ` Tejun Heo
2025-09-25 8:35 ` Peter Zijlstra
2025-09-25 21:43 ` Tejun Heo
2025-09-26 9:59 ` Peter Zijlstra
2025-09-26 16:48 ` Tejun Heo
2025-09-26 10:36 ` Peter Zijlstra
2025-09-26 21:39 ` Tejun Heo
2025-09-29 10:06 ` Peter Zijlstra
2025-09-30 23:49 ` Tejun Heo
2025-10-01 11:54 ` Peter Zijlstra
2025-10-02 23:32 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 13/14] sched: Add {DE,EN}QUEUE_LOCKED Peter Zijlstra
2025-09-11 2:01 ` Tejun Heo
2025-09-11 9:42 ` Peter Zijlstra
2025-09-11 20:40 ` Tejun Heo
2025-09-12 14:19 ` Peter Zijlstra
2025-09-12 16:32 ` Tejun Heo
2025-09-13 22:32 ` Tejun Heo
2025-09-15 8:48 ` Peter Zijlstra
2025-09-25 13:10 ` Peter Zijlstra
2025-09-25 15:40 ` Tejun Heo
2025-09-25 15:53 ` Peter Zijlstra
2025-09-25 18:44 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 14/14] sched/ext: Implement p->srq_lock support Peter Zijlstra
2025-09-10 16:07 ` Peter Zijlstra
2025-09-10 17:32 ` [PATCH 00/14] sched: Support shared runqueue locking Andrea Righi
2025-09-10 18:19 ` Peter Zijlstra
2025-09-10 18:35 ` Peter Zijlstra
2025-09-10 19:00 ` Andrea Righi
2025-09-11 9:58 ` Peter Zijlstra
2025-09-11 14:51 ` Andrea Righi
2025-09-11 14:00 ` Peter Zijlstra
2025-09-11 14:30 ` Peter Zijlstra
2025-09-11 14:48 ` Andrea Righi
2025-09-18 15:15 ` Christian Loehle
2025-09-25 9:00 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250911101008.GD1386988@noisy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=arighi@nvidia.com \
--cc=bsegall@google.com \
--cc=cgroups@vger.kernel.org \
--cc=changwoo@igalia.com \
--cc=dietmar.eggemann@arm.com \
--cc=hannes@cmpxchg.org \
--cc=juri.lelli@redhat.com \
--cc=kprateek.nayak@amd.com \
--cc=linux-kernel@vger.kernel.org \
--cc=liuwenfang@honor.com \
--cc=longman@redhat.com \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=mkoutny@suse.com \
--cc=rostedt@goodmis.org \
--cc=sched-ext@lists.linux.dev \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=vincent.guittot@linaro.org \
--cc=void@manifault.com \
--cc=vschneid@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox