Re: [PATCH 14/14] sched/ext: Implement p->srq_lock support

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: tj@kernel.org
Cc: linux-kernel@vger.kernel.org, mingo@redhat.com,
	juri.lelli@redhat.com, vincent.guittot@linaro.org,
	dietmar.eggemann@arm.com, rostedt@goodmis.org,
	bsegall@google.com, mgorman@suse.de, vschneid@redhat.com,
	longman@redhat.com, hannes@cmpxchg.org, mkoutny@suse.com,
	void@manifault.com, arighi@nvidia.com, changwoo@igalia.com,
	cgroups@vger.kernel.org, sched-ext@lists.linux.dev,
	liuwenfang@honor.com, tglx@linutronix.de
Subject: Re: [PATCH 14/14] sched/ext: Implement p->srq_lock support
Date: Wed, 10 Sep 2025 18:07:35 +0200	[thread overview]
Message-ID: <20250910160735.GL4067720@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20250910155809.916720757@infradead.org>

On Wed, Sep 10, 2025 at 05:44:23PM +0200, Peter Zijlstra wrote:
> Have enqueue set p->srq_lock to &dsq->lock and have dequeue clear it,
> when dst is non-local.
> 
> When enqueue sees ENQUEUE_LOCKED, it must lock dsq->lock (since
> p->srq_lock will be NULL on entry) but must not unlock on exit when it
> sets p->srq_lock.
> 
> When dequeue sees DEQUEUE_LOCKED, it must not lock dsq->lock when
> p->srq_lock is set (instead it must verify they are the same), but it
> must unlock on exit, since it will have cleared p->srq_lock.
> 
> For DEQUEUE_SAVE/ENQUEUE_RESTORE it can retain p->srq_lock, since
> the extra unlock+lock cycle is pointless.
> 
> Note: set_next_task_scx() relies on LOCKED to avoid self-recursion on
> dsq->lock in the enqueue_task/set_next_task case.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

*groan* and obviously I lost a refresh on this patch...

--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1952,13 +1952,16 @@ static void dispatch_enqueue(struct scx_
 			     struct task_struct *p, u64 enq_flags)
 {
 	bool is_local = dsq->id == SCX_DSQ_LOCAL;
+	bool locked = enq_flags & ENQUEUE_LOCKED;
+	bool restore = enq_flags & ENQUEUE_RESTORE;
 
 	WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_list.node));
 	WARN_ON_ONCE((p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) ||
 		     !RB_EMPTY_NODE(&p->scx.dsq_priq));
 
 	if (!is_local) {
-		raw_spin_lock(&dsq->lock);
+		if (!locked || !p->srq_lock)
+			raw_spin_lock(&dsq->lock);
 		if (unlikely(dsq->id == SCX_DSQ_INVALID)) {
 			scx_error(sch, "attempting to dispatch to a destroyed dsq");
 			/* fall back to the global dsq */
@@ -2028,6 +2031,10 @@ static void dispatch_enqueue(struct scx_
 
 	dsq_mod_nr(dsq, 1);
 	p->scx.dsq = dsq;
+	if (!is_local) {
+		WARN_ON_ONCE(locked && restore && p->srq_lock && p->srq_lock != &dsq->lock);
+		p->srq_lock = &dsq->lock;
+	}
 
 	/*
 	 * scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the
@@ -2059,13 +2066,17 @@ static void dispatch_enqueue(struct scx_
 						 rq->curr->sched_class))
 			resched_curr(rq);
 	} else {
-		raw_spin_unlock(&dsq->lock);
+		if (!locked)
+			raw_spin_unlock(&dsq->lock);
 	}
 }
 
 static void task_unlink_from_dsq(struct task_struct *p,
-				 struct scx_dispatch_q *dsq)
+				 struct scx_dispatch_q *dsq,
+				 int deq_flags)
 {
+	bool save = deq_flags & DEQUEUE_SAVE;
+
 	WARN_ON_ONCE(list_empty(&p->scx.dsq_list.node));
 
 	if (p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) {
@@ -2076,12 +2087,15 @@ static void task_unlink_from_dsq(struct
 
 	list_del_init(&p->scx.dsq_list.node);
 	dsq_mod_nr(dsq, -1);
+	if (!save)
+		p->srq_lock = NULL;
 }
 
-static void dispatch_dequeue(struct rq *rq, struct task_struct *p)
+static void dispatch_dequeue(struct rq *rq, struct task_struct *p, int deq_flags)
 {
 	struct scx_dispatch_q *dsq = p->scx.dsq;
 	bool is_local = dsq == &rq->scx.local_dsq;
+	bool locked = deq_flags & DEQUEUE_LOCKED;
 
 	if (!dsq) {
 		/*
@@ -2103,8 +2117,10 @@ static void dispatch_dequeue(struct rq *
 		return;
 	}
 
-	if (!is_local)
-		raw_spin_lock(&dsq->lock);
+	if (!is_local) {
+		if (!locked)
+			raw_spin_lock(&dsq->lock);
+	}
 
 	/*
 	 * Now that we hold @dsq->lock, @p->holding_cpu and @p->scx.dsq_* can't
@@ -2112,7 +2128,8 @@ static void dispatch_dequeue(struct rq *
 	*/
 	if (p->scx.holding_cpu < 0) {
 		/* @p must still be on @dsq, dequeue */
-		task_unlink_from_dsq(p, dsq);
+		WARN_ON_ONCE(!is_local && !p->srq_lock);
+		task_unlink_from_dsq(p, dsq, deq_flags);
 	} else {
 		/*
 		 * We're racing against dispatch_to_local_dsq() which already
@@ -2125,8 +2142,10 @@ static void dispatch_dequeue(struct rq *
 	}
 	p->scx.dsq = NULL;
 
-	if (!is_local)
-		raw_spin_unlock(&dsq->lock);
+	if (!is_local) {
+		if (!locked || !p->srq_lock)
+			raw_spin_unlock(&dsq->lock);
+	}
 }
 
 static struct scx_dispatch_q *find_dsq_for_dispatch(struct scx_sched *sch,
@@ -2508,7 +2527,7 @@ static bool dequeue_task_scx(struct rq *
 	rq->scx.nr_running--;
 	sub_nr_running(rq, 1);
 
-	dispatch_dequeue(rq, p);
+	dispatch_dequeue(rq, p, deq_flags);
 	return true;
 }
 
@@ -2697,7 +2716,7 @@ static bool unlink_dsq_and_lock_src_rq(s
 	lockdep_assert_held(&dsq->lock);
 
 	WARN_ON_ONCE(p->scx.holding_cpu >= 0);
-	task_unlink_from_dsq(p, dsq);
+	task_unlink_from_dsq(p, dsq, 0);
 	p->scx.holding_cpu = cpu;
 
 	raw_spin_unlock(&dsq->lock);
@@ -2769,7 +2788,7 @@ static struct rq *move_task_between_dsqs
 	if (dst_dsq->id == SCX_DSQ_LOCAL) {
 		/* @p is going from a non-local DSQ to a local DSQ */
 		if (src_rq == dst_rq) {
-			task_unlink_from_dsq(p, src_dsq);
+			task_unlink_from_dsq(p, src_dsq, 0);
 			move_local_task_to_local_dsq(p, enq_flags,
 						     src_dsq, dst_rq);
 			raw_spin_unlock(&src_dsq->lock);
@@ -2783,7 +2802,7 @@ static struct rq *move_task_between_dsqs
 		 * @p is going from a non-local DSQ to a non-local DSQ. As
 		 * $src_dsq is already locked, do an abbreviated dequeue.
 		 */
-		task_unlink_from_dsq(p, src_dsq);
+		task_unlink_from_dsq(p, src_dsq, 0);
 		p->scx.dsq = NULL;
 		raw_spin_unlock(&src_dsq->lock);
 
@@ -2849,7 +2868,7 @@ static bool consume_dispatch_q(struct sc
 		struct rq *task_rq = task_rq(p);
 
 		if (rq == task_rq) {
-			task_unlink_from_dsq(p, dsq);
+			task_unlink_from_dsq(p, dsq, 0);
 			move_local_task_to_local_dsq(p, 0, dsq, rq);
 			raw_spin_unlock(&dsq->lock);
 			return true;
@@ -3253,7 +3272,7 @@ static void set_next_task_scx(struct rq
 		 * dispatched. Call ops_dequeue() to notify the BPF scheduler.
 		 */
 		ops_dequeue(rq, p, SCX_DEQ_CORE_SCHED_EXEC);
-		dispatch_dequeue(rq, p);
+		dispatch_dequeue(rq, p, flags);
 	}
 
 	p->se.exec_start = rq_clock_task(rq);
@@ -4999,7 +5018,8 @@ static void scx_disable_workfn(struct kt
 
 	scx_task_iter_start(&sti);
 	while ((p = scx_task_iter_next_locked(&sti))) {
-		unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+		unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE |
+					   DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
 		const struct sched_class *old_class = p->sched_class;
 		const struct sched_class *new_class =
 			__setscheduler_class(p->policy, p->prio);
@@ -5743,7 +5763,8 @@ static int scx_enable(struct sched_ext_o
 	percpu_down_write(&scx_fork_rwsem);
 	scx_task_iter_start(&sti);
 	while ((p = scx_task_iter_next_locked(&sti))) {
-		unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+		unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE |
+					   DEQUEUE_NOCLOCK | DEQUEUE_LOCKED;
 		const struct sched_class *old_class = p->sched_class;
 		const struct sched_class *new_class =
 			__setscheduler_class(p->policy, p->prio);
@@ -6795,7 +6816,7 @@ __bpf_kfunc u32 scx_bpf_reenqueue_local(
 		if (p->migration_pending || is_migration_disabled(p) || p->nr_cpus_allowed == 1)
 			continue;
 
-		dispatch_dequeue(rq, p);
+		dispatch_dequeue(rq, p, 0);
 		list_add_tail(&p->scx.dsq_list.node, &tasks);
 	}

next prev parent reply	other threads:[~2025-09-10 16:07 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-10 15:44 [PATCH 00/14] sched: Support shared runqueue locking Peter Zijlstra
2025-09-10 15:44 ` [PATCH 01/14] sched: Employ sched_change guards Peter Zijlstra
2025-09-11  9:06   ` K Prateek Nayak
2025-09-11  9:55     ` Peter Zijlstra
2025-09-11 10:10       ` Peter Zijlstra
2025-09-11 10:37         ` K Prateek Nayak
2025-10-06 15:21   ` Shrikanth Hegde
2025-10-06 18:14     ` Peter Zijlstra
2025-10-07  5:12       ` Shrikanth Hegde
2025-10-07  9:34         ` Peter Zijlstra
2025-10-16  9:33       ` [tip: sched/core] sched: Mandate shared flags for sched_change tip-bot2 for Peter Zijlstra
2025-09-10 15:44 ` [PATCH 02/14] sched: Re-arrange the {EN,DE}QUEUE flags Peter Zijlstra
2025-09-10 15:44 ` [PATCH 03/14] sched: Fold sched_class::switch{ing,ed}_{to,from}() into the change pattern Peter Zijlstra
2025-09-10 15:44 ` [PATCH 04/14] sched: Cleanup sched_delayed handling for class switches Peter Zijlstra
2025-09-10 15:44 ` [PATCH 05/14] sched: Move sched_class::prio_changed() into the change pattern Peter Zijlstra
2025-09-11  1:44   ` Tejun Heo
2025-09-10 15:44 ` [PATCH 06/14] sched: Fix migrate_disable_switch() locking Peter Zijlstra
2025-09-10 15:44 ` [PATCH 07/14] sched: Fix do_set_cpus_allowed() locking Peter Zijlstra
2025-10-30  0:12   ` Mark Brown
2025-10-30  9:07     ` Peter Zijlstra
2025-10-30 12:47       ` Mark Brown
2025-09-10 15:44 ` [PATCH 08/14] sched: Rename do_set_cpus_allowed() Peter Zijlstra
2025-09-10 15:44 ` [PATCH 09/14] sched: Make __do_set_cpus_allowed() use the sched_change pattern Peter Zijlstra
2025-09-10 15:44 ` [PATCH 10/14] sched: Add locking comments to sched_class methods Peter Zijlstra
2025-09-10 15:44 ` [PATCH 11/14] sched: Add flags to {put_prev,set_next}_task() methods Peter Zijlstra
2025-09-10 15:44 ` [PATCH 12/14] sched: Add shared runqueue locking to __task_rq_lock() Peter Zijlstra
2025-09-12  0:19   ` Tejun Heo
2025-09-12 11:54     ` Peter Zijlstra
2025-09-12 14:11       ` Peter Zijlstra
2025-09-12 17:56       ` Tejun Heo
2025-09-15  8:38         ` Peter Zijlstra
2025-09-16 22:29           ` Tejun Heo
2025-09-16 22:41             ` Tejun Heo
2025-09-25  8:35               ` Peter Zijlstra
2025-09-25 21:43                 ` Tejun Heo
2025-09-26  9:59                   ` Peter Zijlstra
2025-09-26 16:48                     ` Tejun Heo
2025-09-26 10:36                   ` Peter Zijlstra
2025-09-26 21:39                     ` Tejun Heo
2025-09-29 10:06                       ` Peter Zijlstra
2025-09-30 23:49                         ` Tejun Heo
2025-10-01 11:54                           ` Peter Zijlstra
2025-10-02 23:32                             ` Tejun Heo
2025-09-10 15:44 ` [PATCH 13/14] sched: Add {DE,EN}QUEUE_LOCKED Peter Zijlstra
2025-09-11  2:01   ` Tejun Heo
2025-09-11  9:42     ` Peter Zijlstra
2025-09-11 20:40       ` Tejun Heo
2025-09-12 14:19         ` Peter Zijlstra
2025-09-12 16:32           ` Tejun Heo
2025-09-13 22:32             ` Tejun Heo
2025-09-15  8:48               ` Peter Zijlstra
2025-09-25 13:10             ` Peter Zijlstra
2025-09-25 15:40               ` Tejun Heo
2025-09-25 15:53                 ` Peter Zijlstra
2025-09-25 18:44                   ` Tejun Heo
2025-09-10 15:44 ` [PATCH 14/14] sched/ext: Implement p->srq_lock support Peter Zijlstra
2025-09-10 16:07   ` Peter Zijlstra [this message]
2025-09-10 17:32 ` [PATCH 00/14] sched: Support shared runqueue locking Andrea Righi
2025-09-10 18:19   ` Peter Zijlstra
2025-09-10 18:35   ` Peter Zijlstra
2025-09-10 19:00     ` Andrea Righi
2025-09-11  9:58     ` Peter Zijlstra
2025-09-11 14:51       ` Andrea Righi
2025-09-11 14:00   ` Peter Zijlstra
2025-09-11 14:30     ` Peter Zijlstra
2025-09-11 14:48       ` Andrea Righi
2025-09-18 15:15 ` Christian Loehle
2025-09-25  9:00   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250910160735.GL4067720@noisy.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=arighi@nvidia.com \
    --cc=bsegall@google.com \
    --cc=cgroups@vger.kernel.org \
    --cc=changwoo@igalia.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=hannes@cmpxchg.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=liuwenfang@honor.com \
    --cc=longman@redhat.com \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=mkoutny@suse.com \
    --cc=rostedt@goodmis.org \
    --cc=sched-ext@lists.linux.dev \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=void@manifault.com \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox