From: Peter Zijlstra <peterz@infradead.org>
To: tj@kernel.org
Cc: linux-kernel@vger.kernel.org, peterz@infradead.org,
mingo@redhat.com, juri.lelli@redhat.com,
vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
vschneid@redhat.com, longman@redhat.com, hannes@cmpxchg.org,
mkoutny@suse.com, void@manifault.com, arighi@nvidia.com,
changwoo@igalia.com, cgroups@vger.kernel.org,
sched-ext@lists.linux.dev, liuwenfang@honor.com,
tglx@linutronix.de
Subject: [PATCH 13/14] sched: Add {DE,EN}QUEUE_LOCKED
Date: Wed, 10 Sep 2025 17:44:22 +0200 [thread overview]
Message-ID: <20250910155809.800554594@infradead.org> (raw)
In-Reply-To: 20250910154409.446470175@infradead.org
Provide a LOCKED queue flag, indicating that the {en,de}queue()
operation is in task_rq_lock() context.
Note: the sched_change in scx_bypass() is the only one that does not
use task_rq_lock(). If that were fixed, we could have sched_change
imply LOCKED.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
kernel/sched/core.c | 31 +++++++++++++++++++++++++------
kernel/sched/sched.h | 7 +++++++
kernel/sched/syscalls.c | 4 ++--
3 files changed, 34 insertions(+), 8 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2716,7 +2716,7 @@ void set_cpus_allowed_common(struct task
static void
do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx)
{
- u32 flags = DEQUEUE_SAVE | DEQUEUE_NOCLOCK;
+ u32 flags = DEQUEUE_SAVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
scoped_guard (sched_change, p, flags) {
p->sched_class->set_cpus_allowed(p, ctx);
@@ -3749,7 +3749,7 @@ static int ttwu_runnable(struct task_str
if (task_on_rq_queued(p)) {
update_rq_clock(rq);
if (p->se.sched_delayed)
- enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED);
+ enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED | ENQUEUE_LOCKED);
if (!task_on_cpu(rq, p)) {
/*
* When on_rq && !on_cpu the task is preempted, see if
@@ -4816,7 +4816,7 @@ void wake_up_new_task(struct task_struct
update_rq_clock(rq);
post_init_entity_util_avg(p);
- activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL);
+ activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL | ENQUEUE_LOCKED);
trace_sched_wakeup_new(p);
wakeup_preempt(rq, p, wake_flags);
if (p->sched_class->task_woken) {
@@ -7310,7 +7310,7 @@ void rt_mutex_post_schedule(void)
void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
{
int prio, oldprio, queue_flag =
- DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+ DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
const struct sched_class *prev_class, *next_class;
struct rq_flags rf;
struct rq *rq;
@@ -8056,7 +8056,7 @@ int migrate_task_to(struct task_struct *
void sched_setnuma(struct task_struct *p, int nid)
{
guard(task_rq_lock)(p);
- scoped_guard (sched_change, p, DEQUEUE_SAVE)
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_LOCKED)
p->numa_preferred_nid = nid;
}
#endif /* CONFIG_NUMA_BALANCING */
@@ -9160,7 +9160,7 @@ static void sched_change_group(struct ta
void sched_move_task(struct task_struct *tsk, bool for_autogroup)
{
unsigned int queue_flags =
- DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+ DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
bool resched = false;
struct rq *rq;
@@ -10841,6 +10841,13 @@ struct sched_change_ctx *sched_change_be
struct rq *rq = task_rq(p);
lockdep_assert_rq_held(rq);
+#ifdef CONFIG_PROVE_LOCKING
+ if (flags & DEQUEUE_LOCKED) {
+ lockdep_assert_held(&p->pi_lock);
+ if (p->srq_lock)
+ lockdep_assert_held(p->srq_lock);
+ }
+#endif
if (flags & DEQUEUE_CLASS) {
if (WARN_ON_ONCE(flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)))
@@ -10862,6 +10869,9 @@ struct sched_change_ctx *sched_change_be
.flags = flags,
.queued = task_on_rq_queued(p),
.running = task_current(rq, p),
+#ifdef CONFIG_PROVE_LOCKING
+ .srq_lock = p->srq_lock,
+#endif
};
if (!(flags & DEQUEUE_CLASS)) {
@@ -10888,6 +10898,15 @@ void sched_change_end(struct sched_chang
struct rq *rq = task_rq(p);
lockdep_assert_rq_held(rq);
+#ifdef CONFIG_PROVE_LOCKING
+ if (ctx->flags & ENQUEUE_LOCKED) {
+ lockdep_assert_held(&p->pi_lock);
+ if (p->srq_lock)
+ lockdep_assert_held(p->srq_lock);
+ if (ctx->srq_lock && ctx->srq_lock != p->srq_lock)
+ lockdep_assert_not_held(ctx->srq_lock);
+ }
+#endif
if ((ctx->flags & ENQUEUE_CLASS) && p->sched_class->switching_to)
p->sched_class->switching_to(rq, p);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2340,6 +2340,8 @@ extern const u32 sched_prio_to_wmult[40
* CLASS - going to update p->sched_class; makes sched_change call the
* various switch methods.
*
+ * LOCKED - task_rq_lock() context, implies p->srq_lock taken when set.
+ *
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
* ENQUEUE_MIGRATED - the task was migrated during wakeup
@@ -2355,6 +2357,7 @@ extern const u32 sched_prio_to_wmult[40
#define DEQUEUE_MIGRATING 0x0010 /* Matches ENQUEUE_MIGRATING */
#define DEQUEUE_DELAYED 0x0020 /* Matches ENQUEUE_DELAYED */
#define DEQUEUE_CLASS 0x0040 /* Matches ENQUEUE_CLASS */
+#define DEQUEUE_LOCKED 0x0080 /* Matches ENQUEUE_LOCKED */
#define DEQUEUE_SPECIAL 0x00010000
#define DEQUEUE_THROTTLE 0x00020000
@@ -2367,6 +2370,7 @@ extern const u32 sched_prio_to_wmult[40
#define ENQUEUE_MIGRATING 0x0010
#define ENQUEUE_DELAYED 0x0020
#define ENQUEUE_CLASS 0x0040
+#define ENQUEUE_LOCKED 0x0080
#define ENQUEUE_HEAD 0x00010000
#define ENQUEUE_REPLENISH 0x00020000
@@ -3963,6 +3967,9 @@ extern void balance_callbacks(struct rq
struct sched_change_ctx {
u64 prio;
struct task_struct *p;
+#ifdef CONFIG_PROVE_LOCKING
+ raw_spinlock_t *srq_lock;
+#endif
int flags;
bool queued;
bool running;
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -89,7 +89,7 @@ void set_user_nice(struct task_struct *p
return;
}
- scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) {
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED) {
p->static_prio = NICE_TO_PRIO(nice);
set_load_weight(p, true);
old_prio = p->prio;
@@ -503,7 +503,7 @@ int __sched_setscheduler(struct task_str
struct balance_callback *head;
struct rq_flags rf;
int reset_on_fork;
- int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+ int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
struct rq *rq;
bool cpuset_locked = false;
next prev parent reply other threads:[~2025-09-10 16:00 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-10 15:44 [PATCH 00/14] sched: Support shared runqueue locking Peter Zijlstra
2025-09-10 15:44 ` [PATCH 01/14] sched: Employ sched_change guards Peter Zijlstra
2025-09-11 9:06 ` K Prateek Nayak
2025-09-11 9:55 ` Peter Zijlstra
2025-09-11 10:10 ` Peter Zijlstra
2025-09-11 10:37 ` K Prateek Nayak
2025-10-06 15:21 ` Shrikanth Hegde
2025-10-06 18:14 ` Peter Zijlstra
2025-10-07 5:12 ` Shrikanth Hegde
2025-10-07 9:34 ` Peter Zijlstra
2025-10-16 9:33 ` [tip: sched/core] sched: Mandate shared flags for sched_change tip-bot2 for Peter Zijlstra
2025-09-10 15:44 ` [PATCH 02/14] sched: Re-arrange the {EN,DE}QUEUE flags Peter Zijlstra
2025-09-10 15:44 ` [PATCH 03/14] sched: Fold sched_class::switch{ing,ed}_{to,from}() into the change pattern Peter Zijlstra
2025-09-10 15:44 ` [PATCH 04/14] sched: Cleanup sched_delayed handling for class switches Peter Zijlstra
2025-09-10 15:44 ` [PATCH 05/14] sched: Move sched_class::prio_changed() into the change pattern Peter Zijlstra
2025-09-11 1:44 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 06/14] sched: Fix migrate_disable_switch() locking Peter Zijlstra
2025-09-10 15:44 ` [PATCH 07/14] sched: Fix do_set_cpus_allowed() locking Peter Zijlstra
2025-10-30 0:12 ` Mark Brown
2025-10-30 9:07 ` Peter Zijlstra
2025-10-30 12:47 ` Mark Brown
2025-09-10 15:44 ` [PATCH 08/14] sched: Rename do_set_cpus_allowed() Peter Zijlstra
2025-09-10 15:44 ` [PATCH 09/14] sched: Make __do_set_cpus_allowed() use the sched_change pattern Peter Zijlstra
2025-09-10 15:44 ` [PATCH 10/14] sched: Add locking comments to sched_class methods Peter Zijlstra
2025-09-10 15:44 ` [PATCH 11/14] sched: Add flags to {put_prev,set_next}_task() methods Peter Zijlstra
2025-09-10 15:44 ` [PATCH 12/14] sched: Add shared runqueue locking to __task_rq_lock() Peter Zijlstra
2025-09-12 0:19 ` Tejun Heo
2025-09-12 11:54 ` Peter Zijlstra
2025-09-12 14:11 ` Peter Zijlstra
2025-09-12 17:56 ` Tejun Heo
2025-09-15 8:38 ` Peter Zijlstra
2025-09-16 22:29 ` Tejun Heo
2025-09-16 22:41 ` Tejun Heo
2025-09-25 8:35 ` Peter Zijlstra
2025-09-25 21:43 ` Tejun Heo
2025-09-26 9:59 ` Peter Zijlstra
2025-09-26 16:48 ` Tejun Heo
2025-09-26 10:36 ` Peter Zijlstra
2025-09-26 21:39 ` Tejun Heo
2025-09-29 10:06 ` Peter Zijlstra
2025-09-30 23:49 ` Tejun Heo
2025-10-01 11:54 ` Peter Zijlstra
2025-10-02 23:32 ` Tejun Heo
2025-09-10 15:44 ` Peter Zijlstra [this message]
2025-09-11 2:01 ` [PATCH 13/14] sched: Add {DE,EN}QUEUE_LOCKED Tejun Heo
2025-09-11 9:42 ` Peter Zijlstra
2025-09-11 20:40 ` Tejun Heo
2025-09-12 14:19 ` Peter Zijlstra
2025-09-12 16:32 ` Tejun Heo
2025-09-13 22:32 ` Tejun Heo
2025-09-15 8:48 ` Peter Zijlstra
2025-09-25 13:10 ` Peter Zijlstra
2025-09-25 15:40 ` Tejun Heo
2025-09-25 15:53 ` Peter Zijlstra
2025-09-25 18:44 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 14/14] sched/ext: Implement p->srq_lock support Peter Zijlstra
2025-09-10 16:07 ` Peter Zijlstra
2025-09-10 17:32 ` [PATCH 00/14] sched: Support shared runqueue locking Andrea Righi
2025-09-10 18:19 ` Peter Zijlstra
2025-09-10 18:35 ` Peter Zijlstra
2025-09-10 19:00 ` Andrea Righi
2025-09-11 9:58 ` Peter Zijlstra
2025-09-11 14:51 ` Andrea Righi
2025-09-11 14:00 ` Peter Zijlstra
2025-09-11 14:30 ` Peter Zijlstra
2025-09-11 14:48 ` Andrea Righi
2025-09-18 15:15 ` Christian Loehle
2025-09-25 9:00 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250910155809.800554594@infradead.org \
--to=peterz@infradead.org \
--cc=arighi@nvidia.com \
--cc=bsegall@google.com \
--cc=cgroups@vger.kernel.org \
--cc=changwoo@igalia.com \
--cc=dietmar.eggemann@arm.com \
--cc=hannes@cmpxchg.org \
--cc=juri.lelli@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=liuwenfang@honor.com \
--cc=longman@redhat.com \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=mkoutny@suse.com \
--cc=rostedt@goodmis.org \
--cc=sched-ext@lists.linux.dev \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=vincent.guittot@linaro.org \
--cc=void@manifault.com \
--cc=vschneid@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.