From: Peter Zijlstra <peterz@infradead.org>
To: tj@kernel.org
Cc: linux-kernel@vger.kernel.org, mingo@redhat.com,
juri.lelli@redhat.com, vincent.guittot@linaro.org,
dietmar.eggemann@arm.com, rostedt@goodmis.org,
bsegall@google.com, mgorman@suse.de, vschneid@redhat.com,
longman@redhat.com, hannes@cmpxchg.org, mkoutny@suse.com,
void@manifault.com, arighi@nvidia.com, changwoo@igalia.com,
cgroups@vger.kernel.org, sched-ext@lists.linux.dev,
liuwenfang@honor.com, tglx@linutronix.de
Subject: Re: [PATCH 14/14] sched/ext: Implement p->srq_lock support
Date: Wed, 10 Sep 2025 18:07:35 +0200 [thread overview]
Message-ID: <20250910160735.GL4067720@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20250910155809.916720757@infradead.org>
On Wed, Sep 10, 2025 at 05:44:23PM +0200, Peter Zijlstra wrote:
> Have enqueue set p->srq_lock to &dsq->lock and have dequeue clear it,
> when dst is non-local.
>
> When enqueue sees ENQUEUE_LOCKED, it must lock dsq->lock (since
> p->srq_lock will be NULL on entry) but must not unlock on exit when it
> sets p->srq_lock.
>
> When dequeue sees DEQUEUE_LOCKED, it must not lock dsq->lock when
> p->srq_lock is set (instead it must verify they are the same), but it
> must unlock on exit, since it will have cleared p->srq_lock.
>
> For DEQUEUE_SAVE/ENQUEUE_RESTORE it can retain p->srq_lock, since
> the extra unlock+lock cycle is pointless.
>
> Note: set_next_task_scx() relies on LOCKED to avoid self-recursion on
> dsq->lock in the enqueue_task/set_next_task case.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
*groan* and obviously I lost a refresh on this patch...
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1952,13 +1952,16 @@ static void dispatch_enqueue(struct scx_
struct task_struct *p, u64 enq_flags)
{
bool is_local = dsq->id == SCX_DSQ_LOCAL;
+ bool locked = enq_flags & ENQUEUE_LOCKED;
+ bool restore = enq_flags & ENQUEUE_RESTORE;
WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_list.node));
WARN_ON_ONCE((p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) ||
!RB_EMPTY_NODE(&p->scx.dsq_priq));
if (!is_local) {
- raw_spin_lock(&dsq->lock);
+ if (!locked || !p->srq_lock)
+ raw_spin_lock(&dsq->lock);
if (unlikely(dsq->id == SCX_DSQ_INVALID)) {
scx_error(sch, "attempting to dispatch to a destroyed dsq");
/* fall back to the global dsq */
@@ -2028,6 +2031,10 @@ static void dispatch_enqueue(struct scx_
dsq_mod_nr(dsq, 1);
p->scx.dsq = dsq;
+ if (!is_local) {
+ WARN_ON_ONCE(locked && restore && p->srq_lock && p->srq_lock != &dsq->lock);
+ p->srq_lock = &dsq->lock;
+ }
/*
* scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the
@@ -2059,13 +2066,17 @@ static void dispatch_enqueue(struct scx_
rq->curr->sched_class))
resched_curr(rq);
} else {
- raw_spin_unlock(&dsq->lock);
+ if (!locked)
+ raw_spin_unlock(&dsq->lock);
}
}
static void task_unlink_from_dsq(struct task_struct *p,
- struct scx_dispatch_q *dsq)
+ struct scx_dispatch_q *dsq,
+ int deq_flags)
{
+ bool save = deq_flags & DEQUEUE_SAVE;
+
WARN_ON_ONCE(list_empty(&p->scx.dsq_list.node));
if (p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) {
@@ -2076,12 +2087,15 @@ static void task_unlink_from_dsq(struct
list_del_init(&p->scx.dsq_list.node);
dsq_mod_nr(dsq, -1);
+ if (!save)
+ p->srq_lock = NULL;
}
-static void dispatch_dequeue(struct rq *rq, struct task_struct *p)
+static void dispatch_dequeue(struct rq *rq, struct task_struct *p, int deq_flags)
{
struct scx_dispatch_q *dsq = p->scx.dsq;
bool is_local = dsq == &rq->scx.local_dsq;
+ bool locked = deq_flags & DEQUEUE_LOCKED;
if (!dsq) {
/*
@@ -2103,8 +2117,10 @@ static void dispatch_dequeue(struct rq *
return;
}
- if (!is_local)
- raw_spin_lock(&dsq->lock);
+ if (!is_local) {
+ if (!locked)
+ raw_spin_lock(&dsq->lock);
+ }
/*
* Now that we hold @dsq->lock, @p->holding_cpu and @p->scx.dsq_* can't
@@ -2112,7 +2128,8 @@ static void dispatch_dequeue(struct rq *
*/
if (p->scx.holding_cpu < 0) {
/* @p must still be on @dsq, dequeue */
- task_unlink_from_dsq(p, dsq);
+ WARN_ON_ONCE(!is_local && !p->srq_lock);
+ task_unlink_from_dsq(p, dsq, deq_flags);
} else {
/*
* We're racing against dispatch_to_local_dsq() which already
@@ -2125,8 +2142,10 @@ static void dispatch_dequeue(struct rq *
}
p->scx.dsq = NULL;
- if (!is_local)
- raw_spin_unlock(&dsq->lock);
+ if (!is_local) {
+ if (!locked || !p->srq_lock)
+ raw_spin_unlock(&dsq->lock);
+ }
}
static struct scx_dispatch_q *find_dsq_for_dispatch(struct scx_sched *sch,
@@ -2508,7 +2527,7 @@ static bool dequeue_task_scx(struct rq *
rq->scx.nr_running--;
sub_nr_running(rq, 1);
- dispatch_dequeue(rq, p);
+ dispatch_dequeue(rq, p, deq_flags);
return true;
}
@@ -2697,7 +2716,7 @@ static bool unlink_dsq_and_lock_src_rq(s
lockdep_assert_held(&dsq->lock);
WARN_ON_ONCE(p->scx.holding_cpu >= 0);
- task_unlink_from_dsq(p, dsq);
+ task_unlink_from_dsq(p, dsq, 0);
p->scx.holding_cpu = cpu;
raw_spin_unlock(&dsq->lock);
@@ -2769,7 +2788,7 @@ static struct rq *move_task_between_dsqs
if (dst_dsq->id == SCX_DSQ_LOCAL) {
/* @p is going from a non-local DSQ to a local DSQ */
if (src_rq == dst_rq) {
- task_unlink_from_dsq(p, src_dsq);
+ task_unlink_from_dsq(p, src_dsq, 0);
move_local_task_to_local_dsq(p, enq_flags,
src_dsq, dst_rq);
raw_spin_unlock(&src_dsq->lock);
@@ -2783,7 +2802,7 @@ static struct rq *move_task_between_dsqs
* @p is going from a non-local DSQ to a non-local DSQ. As
* $src_dsq is already locked, do an abbreviated dequeue.
*/
- task_unlink_from_dsq(p, src_dsq);
+ task_unlink_from_dsq(p, src_dsq, 0);
p->scx.dsq = NULL;
raw_spin_unlock(&src_dsq->lock);
@@ -2849,7 +2868,7 @@ static bool consume_dispatch_q(struct sc
struct rq *task_rq = task_rq(p);
if (rq == task_rq) {
- task_unlink_from_dsq(p, dsq);
+ task_unlink_from_dsq(p, dsq, 0);
move_local_task_to_local_dsq(p, 0, dsq, rq);
raw_spin_unlock(&dsq->lock);
return true;
@@ -3253,7 +3272,7 @@ static void set_next_task_scx(struct rq
* dispatched. Call ops_dequeue() to notify the BPF scheduler.
*/
ops_dequeue(rq, p, SCX_DEQ_CORE_SCHED_EXEC);
- dispatch_dequeue(rq, p);
+ dispatch_dequeue(rq, p, flags);
}
p->se.exec_start = rq_clock_task(rq);
@@ -4999,7 +5018,8 @@ static void scx_disable_workfn(struct kt
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
- unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+ unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE |
+ DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
@@ -5743,7 +5763,8 @@ static int scx_enable(struct sched_ext_o
percpu_down_write(&scx_fork_rwsem);
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
- unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+ unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE |
+ DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
@@ -6795,7 +6816,7 @@ __bpf_kfunc u32 scx_bpf_reenqueue_local(
if (p->migration_pending || is_migration_disabled(p) || p->nr_cpus_allowed == 1)
continue;
- dispatch_dequeue(rq, p);
+ dispatch_dequeue(rq, p, 0);
list_add_tail(&p->scx.dsq_list.node, &tasks);
}
next prev parent reply other threads:[~2025-09-10 16:07 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-10 15:44 [PATCH 00/14] sched: Support shared runqueue locking Peter Zijlstra
2025-09-10 15:44 ` [PATCH 01/14] sched: Employ sched_change guards Peter Zijlstra
2025-09-11 9:06 ` K Prateek Nayak
2025-09-11 9:55 ` Peter Zijlstra
2025-09-11 10:10 ` Peter Zijlstra
2025-09-11 10:37 ` K Prateek Nayak
2025-10-06 15:21 ` Shrikanth Hegde
2025-10-06 18:14 ` Peter Zijlstra
2025-10-07 5:12 ` Shrikanth Hegde
2025-10-07 9:34 ` Peter Zijlstra
2025-10-16 9:33 ` [tip: sched/core] sched: Mandate shared flags for sched_change tip-bot2 for Peter Zijlstra
2025-09-10 15:44 ` [PATCH 02/14] sched: Re-arrange the {EN,DE}QUEUE flags Peter Zijlstra
2025-09-10 15:44 ` [PATCH 03/14] sched: Fold sched_class::switch{ing,ed}_{to,from}() into the change pattern Peter Zijlstra
2025-09-10 15:44 ` [PATCH 04/14] sched: Cleanup sched_delayed handling for class switches Peter Zijlstra
2025-09-10 15:44 ` [PATCH 05/14] sched: Move sched_class::prio_changed() into the change pattern Peter Zijlstra
2025-09-11 1:44 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 06/14] sched: Fix migrate_disable_switch() locking Peter Zijlstra
2025-09-10 15:44 ` [PATCH 07/14] sched: Fix do_set_cpus_allowed() locking Peter Zijlstra
2025-10-30 0:12 ` Mark Brown
2025-10-30 9:07 ` Peter Zijlstra
2025-10-30 12:47 ` Mark Brown
2025-09-10 15:44 ` [PATCH 08/14] sched: Rename do_set_cpus_allowed() Peter Zijlstra
2025-09-10 15:44 ` [PATCH 09/14] sched: Make __do_set_cpus_allowed() use the sched_change pattern Peter Zijlstra
2025-09-10 15:44 ` [PATCH 10/14] sched: Add locking comments to sched_class methods Peter Zijlstra
2025-09-10 15:44 ` [PATCH 11/14] sched: Add flags to {put_prev,set_next}_task() methods Peter Zijlstra
2025-09-10 15:44 ` [PATCH 12/14] sched: Add shared runqueue locking to __task_rq_lock() Peter Zijlstra
2025-09-12 0:19 ` Tejun Heo
2025-09-12 11:54 ` Peter Zijlstra
2025-09-12 14:11 ` Peter Zijlstra
2025-09-12 17:56 ` Tejun Heo
2025-09-15 8:38 ` Peter Zijlstra
2025-09-16 22:29 ` Tejun Heo
2025-09-16 22:41 ` Tejun Heo
2025-09-25 8:35 ` Peter Zijlstra
2025-09-25 21:43 ` Tejun Heo
2025-09-26 9:59 ` Peter Zijlstra
2025-09-26 16:48 ` Tejun Heo
2025-09-26 10:36 ` Peter Zijlstra
2025-09-26 21:39 ` Tejun Heo
2025-09-29 10:06 ` Peter Zijlstra
2025-09-30 23:49 ` Tejun Heo
2025-10-01 11:54 ` Peter Zijlstra
2025-10-02 23:32 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 13/14] sched: Add {DE,EN}QUEUE_LOCKED Peter Zijlstra
2025-09-11 2:01 ` Tejun Heo
2025-09-11 9:42 ` Peter Zijlstra
2025-09-11 20:40 ` Tejun Heo
2025-09-12 14:19 ` Peter Zijlstra
2025-09-12 16:32 ` Tejun Heo
2025-09-13 22:32 ` Tejun Heo
2025-09-15 8:48 ` Peter Zijlstra
2025-09-25 13:10 ` Peter Zijlstra
2025-09-25 15:40 ` Tejun Heo
2025-09-25 15:53 ` Peter Zijlstra
2025-09-25 18:44 ` Tejun Heo
2025-09-10 15:44 ` [PATCH 14/14] sched/ext: Implement p->srq_lock support Peter Zijlstra
2025-09-10 16:07 ` Peter Zijlstra [this message]
2025-09-10 17:32 ` [PATCH 00/14] sched: Support shared runqueue locking Andrea Righi
2025-09-10 18:19 ` Peter Zijlstra
2025-09-10 18:35 ` Peter Zijlstra
2025-09-10 19:00 ` Andrea Righi
2025-09-11 9:58 ` Peter Zijlstra
2025-09-11 14:51 ` Andrea Righi
2025-09-11 14:00 ` Peter Zijlstra
2025-09-11 14:30 ` Peter Zijlstra
2025-09-11 14:48 ` Andrea Righi
2025-09-18 15:15 ` Christian Loehle
2025-09-25 9:00 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250910160735.GL4067720@noisy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=arighi@nvidia.com \
--cc=bsegall@google.com \
--cc=cgroups@vger.kernel.org \
--cc=changwoo@igalia.com \
--cc=dietmar.eggemann@arm.com \
--cc=hannes@cmpxchg.org \
--cc=juri.lelli@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=liuwenfang@honor.com \
--cc=longman@redhat.com \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=mkoutny@suse.com \
--cc=rostedt@goodmis.org \
--cc=sched-ext@lists.linux.dev \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=vincent.guittot@linaro.org \
--cc=void@manifault.com \
--cc=vschneid@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.