From: Peter Zijlstra <peterz@infradead.org>
To: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: tj@kernel.org, linux-kernel@vger.kernel.org, mingo@redhat.com,
juri.lelli@redhat.com, vincent.guittot@linaro.org,
dietmar.eggemann@arm.com, rostedt@goodmis.org,
bsegall@google.com, mgorman@suse.de, vschneid@redhat.com,
longman@redhat.com, hannes@cmpxchg.org, mkoutny@suse.com,
void@manifault.com, arighi@nvidia.com, changwoo@igalia.com,
cgroups@vger.kernel.org, sched-ext@lists.linux.dev,
liuwenfang@honor.com, tglx@linutronix.de
Subject: Re: [PATCH 01/14] sched: Employ sched_change guards
Date: Thu, 11 Sep 2025 12:10:08 +0200 [thread overview]
Message-ID: <20250911101008.GD1386988@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20250911095523.GX3289052@noisy.programming.kicks-ass.net>
On Thu, Sep 11, 2025 at 11:55:23AM +0200, Peter Zijlstra wrote:
> On Thu, Sep 11, 2025 at 02:36:21PM +0530, K Prateek Nayak wrote:
> > Hello Peter,
> >
> > On 9/10/2025 9:14 PM, Peter Zijlstra wrote:
> > > @@ -9240,8 +9213,9 @@ static void sched_change_group(struct ta
> > > */
> > > void sched_move_task(struct task_struct *tsk, bool for_autogroup)
> > > {
> > > - int queued, running, queue_flags =
> > > + unsigned int queue_flags =
> > > DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
> >
> > nit.
> >
> > Since we don't do a complete dequeue for delayed task in
> > sched_move_task(), can we get rid of that DEQUEUE_NOCLOCK and ...
> >
> > > + bool resched = false;
> > > struct rq *rq;
> > >
> > > CLASS(task_rq_lock, rq_guard)(tsk);
> > > @@ -9249,28 +9223,12 @@ void sched_move_task(struct task_struct
> > >
> > > update_rq_clock(rq);
> >
> > ... this clock update and instead rely on sched_change_begin() to
> > handle it within the guard?
>
> Yeah, I suppose we could. But let me try and do that in a later patch,
> on-top of all this.
Something like so?
---
core.c | 33 +++++++++++----------------------
ext.c | 12 ++++--------
syscalls.c | 4 +---
3 files changed, 16 insertions(+), 33 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2359,10 +2359,8 @@ static void migrate_disable_switch(struc
if (p->cpus_ptr != &p->cpus_mask)
return;
- scoped_guard (task_rq_lock, p) {
- update_rq_clock(scope.rq);
+ scoped_guard (task_rq_lock, p)
do_set_cpus_allowed(p, &ac);
- }
}
void migrate_disable(void)
@@ -2716,9 +2714,7 @@ void set_cpus_allowed_common(struct task
static void
do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx)
{
- u32 flags = DEQUEUE_SAVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
-
- scoped_guard (sched_change, p, flags) {
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_LOCKED) {
p->sched_class->set_cpus_allowed(p, ctx);
mm_set_cpus_allowed(p->mm, ctx->new_mask);
}
@@ -2740,10 +2736,8 @@ void set_cpus_allowed_force(struct task_
struct rcu_head rcu;
};
- scoped_guard (__task_rq_lock, p) {
- update_rq_clock(scope.rq);
+ scoped_guard (__task_rq_lock, p)
do_set_cpus_allowed(p, &ac);
- }
/*
* Because this is called with p->pi_lock held, it is not possible
@@ -9159,16 +9153,13 @@ static void sched_change_group(struct ta
*/
void sched_move_task(struct task_struct *tsk, bool for_autogroup)
{
- unsigned int queue_flags =
- DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
+ unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_LOCKED;
bool resched = false;
struct rq *rq;
CLASS(task_rq_lock, rq_guard)(tsk);
rq = rq_guard.rq;
- update_rq_clock(rq);
-
scoped_guard (sched_change, tsk, queue_flags) {
sched_change_group(tsk);
if (!for_autogroup)
@@ -10852,19 +10843,17 @@ struct sched_change_ctx *sched_change_be
}
#endif
+ if (!(flags & DEQUEUE_NOCLOCK)) {
+ update_rq_clock(rq);
+ flags |= DEQUEUE_NOCLOCK;
+ }
+
if (flags & DEQUEUE_CLASS) {
if (WARN_ON_ONCE(flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)))
flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
- if (p->sched_class->switching_from) {
- /*
- * switching_from_fair() assumes CLASS implies NOCLOCK;
- * fixing this assumption would mean switching_from()
- * would need to be able to change flags.
- */
- WARN_ON(!(flags & DEQUEUE_NOCLOCK));
+ if (p->sched_class->switching_from)
p->sched_class->switching_from(rq, p);
- }
}
*ctx = (struct sched_change_ctx){
@@ -10915,7 +10904,7 @@ void sched_change_end(struct sched_chang
p->sched_class->switching_to(rq, p);
if (ctx->queued)
- enqueue_task(rq, p, ctx->flags | ENQUEUE_NOCLOCK);
+ enqueue_task(rq, p, ctx->flags);
if (ctx->running)
set_next_task(rq, p, ctx->flags);
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -5018,14 +5018,12 @@ static void scx_disable_workfn(struct kt
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
- unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE |
- DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
+ unsigned int queue_flags =
+ DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_LOCKED;
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
- update_rq_clock(task_rq(p));
-
if (old_class != new_class) {
queue_flags |= DEQUEUE_CLASS;
queue_flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
@@ -5763,8 +5761,8 @@ static int scx_enable(struct sched_ext_o
percpu_down_write(&scx_fork_rwsem);
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
- unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE |
- DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
+ unsigned int queue_flags =
+ DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_LOCKED;
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
@@ -5772,8 +5770,6 @@ static int scx_enable(struct sched_ext_o
if (!tryget_task_struct(p))
continue;
- update_rq_clock(task_rq(p));
-
if (old_class != new_class) {
queue_flags |= DEQUEUE_CLASS;
queue_flags &= ~(DEQUEUE_SAVE | DEQUEUE_MOVE);
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -76,8 +76,6 @@ void set_user_nice(struct task_struct *p
CLASS(task_rq_lock, rq_guard)(p);
rq = rq_guard.rq;
- update_rq_clock(rq);
-
/*
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
@@ -89,7 +87,7 @@ void set_user_nice(struct task_struct *p
return;
}
- scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED) {
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_LOCKED) {
p->static_prio = NICE_TO_PRIO(nice);
set_load_weight(p, true);
old_prio = p->prio;
next prev parent reply other threads:[~2025-09-11 10:10 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-10 15:44 [PATCH 00/14] sched: Support shared runqueue locking Peter Zijlstra
2025-09-10 15:44 ` [PATCH 01/14] sched: Employ sched_change guards Peter Zijlstra
2025-09-11 9:06 ` K Prateek Nayak
2025-09-11 9:55 ` Peter Zijlstra
2025-09-11 10:10 ` Peter Zijlstra [this message]
2025-09-11 10:37 ` K Prateek Nayak
2025-10-06 15:21 ` Shrikanth Hegde
2025-10-06 18:14 ` Peter Zijlstra
2025-10-07 5:12 ` Shrikanth Hegde
2025-10-07 9:34 ` Peter Zijlstra
2025-10-16 9:33 ` [tip: sched/core] sched: Mandate shared flags for sched_change tip-bot2 for Peter Zijlstra
2025-09-10 15:44 ` [PATCH 02/14] sched: Re-arrange the {EN,DE}QUEUE flags Peter Zijlstra
2025-09-10 15:44 ` [PATCH 03/14] sched: Fold sched_class::switch{ing,ed}_{to,from}() into the change pattern Peter Zijlstra
2025-09-10 15:44 ` [PATCH 04/14] sched: Cleanup sched_delayed handling for class switches Peter Zijlstra
2025-09-10 15:44 ` [PATCH 05/14] sched: Move sched_class::prio_changed() into the change pattern Peter Zijlstra
2025-09-11 1:44 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 06/14] sched: Fix migrate_disable_switch() locking Peter Zijlstra
2025-09-10 15:44 ` [PATCH 07/14] sched: Fix do_set_cpus_allowed() locking Peter Zijlstra
2025-10-30 0:12 ` Mark Brown
2025-10-30 9:07 ` Peter Zijlstra
2025-10-30 12:47 ` Mark Brown
2025-09-10 15:44 ` [PATCH 08/14] sched: Rename do_set_cpus_allowed() Peter Zijlstra
2025-09-10 15:44 ` [PATCH 09/14] sched: Make __do_set_cpus_allowed() use the sched_change pattern Peter Zijlstra
2025-09-10 15:44 ` [PATCH 10/14] sched: Add locking comments to sched_class methods Peter Zijlstra
2025-09-10 15:44 ` [PATCH 11/14] sched: Add flags to {put_prev,set_next}_task() methods Peter Zijlstra
2025-09-10 15:44 ` [PATCH 12/14] sched: Add shared runqueue locking to __task_rq_lock() Peter Zijlstra
2025-09-12 0:19 ` Tejun Heo
2025-09-12 11:54 ` Peter Zijlstra
2025-09-12 14:11 ` Peter Zijlstra
2025-09-12 17:56 ` Tejun Heo
2025-09-15 8:38 ` Peter Zijlstra
2025-09-16 22:29 ` Tejun Heo
2025-09-16 22:41 ` Tejun Heo
2025-09-25 8:35 ` Peter Zijlstra
2025-09-25 21:43 ` Tejun Heo
2025-09-26 9:59 ` Peter Zijlstra
2025-09-26 16:48 ` Tejun Heo
2025-09-26 10:36 ` Peter Zijlstra
2025-09-26 21:39 ` Tejun Heo
2025-09-29 10:06 ` Peter Zijlstra
2025-09-30 23:49 ` Tejun Heo
2025-10-01 11:54 ` Peter Zijlstra
2025-10-02 23:32 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 13/14] sched: Add {DE,EN}QUEUE_LOCKED Peter Zijlstra
2025-09-11 2:01 ` Tejun Heo
2025-09-11 9:42 ` Peter Zijlstra
2025-09-11 20:40 ` Tejun Heo
2025-09-12 14:19 ` Peter Zijlstra
2025-09-12 16:32 ` Tejun Heo
2025-09-13 22:32 ` Tejun Heo
2025-09-15 8:48 ` Peter Zijlstra
2025-09-25 13:10 ` Peter Zijlstra
2025-09-25 15:40 ` Tejun Heo
2025-09-25 15:53 ` Peter Zijlstra
2025-09-25 18:44 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 14/14] sched/ext: Implement p->srq_lock support Peter Zijlstra
2025-09-10 16:07 ` Peter Zijlstra
2025-09-10 17:32 ` [PATCH 00/14] sched: Support shared runqueue locking Andrea Righi
2025-09-10 18:19 ` Peter Zijlstra
2025-09-10 18:35 ` Peter Zijlstra
2025-09-10 19:00 ` Andrea Righi
2025-09-11 9:58 ` Peter Zijlstra
2025-09-11 14:51 ` Andrea Righi
2025-09-11 14:00 ` Peter Zijlstra
2025-09-11 14:30 ` Peter Zijlstra
2025-09-11 14:48 ` Andrea Righi
2025-09-18 15:15 ` Christian Loehle
2025-09-25 9:00 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250911101008.GD1386988@noisy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=arighi@nvidia.com \
--cc=bsegall@google.com \
--cc=cgroups@vger.kernel.org \
--cc=changwoo@igalia.com \
--cc=dietmar.eggemann@arm.com \
--cc=hannes@cmpxchg.org \
--cc=juri.lelli@redhat.com \
--cc=kprateek.nayak@amd.com \
--cc=linux-kernel@vger.kernel.org \
--cc=liuwenfang@honor.com \
--cc=longman@redhat.com \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=mkoutny@suse.com \
--cc=rostedt@goodmis.org \
--cc=sched-ext@lists.linux.dev \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=vincent.guittot@linaro.org \
--cc=void@manifault.com \
--cc=vschneid@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.