All of lore.kernel.org
 help / color / mirror / Atom feed
From: liuwenfang <liuwenfang@honor.com>
To: 'Tejun Heo' <tj@kernel.org>
Cc: 'David Vernet' <void@manifault.com>,
	'Andrea Righi' <arighi@nvidia.com>,
	'Changwoo Min' <changwoo@igalia.com>,
	'Ingo Molnar' <mingo@redhat.com>,
	'Peter Zijlstra' <peterz@infradead.org>,
	'Juri Lelli' <juri.lelli@redhat.com>,
	'Vincent Guittot' <vincent.guittot@linaro.org>,
	'Dietmar Eggemann' <dietmar.eggemann@arm.com>,
	'Steven Rostedt' <rostedt@goodmis.org>,
	'Ben Segall' <bsegall@google.com>, 'Mel Gorman' <mgorman@suse.de>,
	'Valentin Schneider' <vschneid@redhat.com>,
	"'linux-kernel@vger.kernel.org'" <linux-kernel@vger.kernel.org>
Subject: [PATCH v4 1/3] sched_ext: Fix pnt_seq calculation when picking the next task
Date: Tue, 19 Aug 2025 06:52:03 +0000	[thread overview]
Message-ID: <228ebd9e6ed3437996dffe15735a9caa@honor.com> (raw)
In-Reply-To: <814bebd2ad844b08993836fd8e7274b8@honor.com>

Now as the rq->scx.pnt_seq is only incremented when the target CPU
switches from one SCX task to one non-SCX task, the pair CPU would
not exit the busy-wait state reasonably in scx_pair.

In scx_pair, rq->scx.pnt_seq is introduced to improve exclusion
guarantees. The invoking CPU calls scx_bpf_kick_cpu() with
SCX_KICK_WAIT and enters the busy-wait state. It should exit this
state once the target CPU has entered the rescheduling path with
rq->scx.pnt_seq incremented.

So, pnt_seq calculation is moved to put_prev_set_next_task(), it
will be incremented for any task switches on the target CPU, then
the invoking CPU can exit the busy-wait state properly.

Signed-off-by: Wenfang Liu liuwenfang@honor.com
---
 kernel/sched/ext.c   | 10 +---------
 kernel/sched/fair.c  |  2 +-
 kernel/sched/sched.h | 30 +++++++++++++++++++++++++++++-
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index f5133249f..ba99739d7 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3191,14 +3191,6 @@ static void switch_class(struct rq *rq, struct task_struct *next)
 {
 	const struct sched_class *next_class = next->sched_class;
 
-#ifdef CONFIG_SMP
-	/*
-	 * Pairs with the smp_load_acquire() issued by a CPU in
-	 * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
-	 * resched.
-	 */
-	smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
-#endif
 	if (!static_branch_unlikely(&scx_ops_cpu_preempt))
 		return;
 
@@ -5966,7 +5958,7 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work)
 		if (cpu != cpu_of(this_rq)) {
 			/*
 			 * Pairs with smp_store_release() issued by this CPU in
-			 * switch_class() on the resched path.
+			 * __put_prev_set_next_scx() on the resched path.
 			 *
 			 * We busy-wait here to guarantee that no other task can
 			 * be scheduled on our core before the target CPU has
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0fb9bf995..21214b3fa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8885,7 +8885,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
 	if (prev->sched_class != &fair_sched_class)
 		goto simple;
 
-	__put_prev_set_next_dl_server(rq, prev, p);
+	__put_prev_set_next(rq, prev, p);
 
 	/*
 	 * Because of the set_next_buddy() in dequeue_task_fair() it is rather
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 47972f34e..435de61c4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1738,12 +1738,32 @@ static inline void scx_rq_clock_invalidate(struct rq *rq)
 	WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);
 }
 
+static inline void __put_prev_set_next_scx(struct rq *rq,
+					   struct task_struct *prev,
+					   struct task_struct *next)
+{
+	if (!scx_enabled())
+		return;
+
+#ifdef CONFIG_SMP
+	/*
+	 * Pairs with the smp_load_acquire() issued by a CPU in
+	 * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
+	 * resched.
+	 */
+	smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
+#endif
+}
+
 #else /* !CONFIG_SCHED_CLASS_EXT */
 #define scx_enabled()		false
 #define scx_switched_all()	false
 
 static inline void scx_rq_clock_update(struct rq *rq, u64 clock) {}
 static inline void scx_rq_clock_invalidate(struct rq *rq) {}
+static inline void __put_prev_set_next_scx(struct rq *rq,
+					   struct task_struct *prev,
+					   struct task_struct *next) {}
 #endif /* !CONFIG_SCHED_CLASS_EXT */
 
 /*
@@ -2457,13 +2477,21 @@ __put_prev_set_next_dl_server(struct rq *rq,
 	rq->dl_server = NULL;
 }
 
+static inline void __put_prev_set_next(struct rq *rq,
+				       struct task_struct *prev,
+				       struct task_struct *next)
+{
+	__put_prev_set_next_dl_server(rq, prev, next);
+	__put_prev_set_next_scx(rq, prev, next);
+}
+
 static inline void put_prev_set_next_task(struct rq *rq,
 					  struct task_struct *prev,
 					  struct task_struct *next)
 {
 	WARN_ON_ONCE(rq->curr != prev);
 
-	__put_prev_set_next_dl_server(rq, prev, next);
+	__put_prev_set_next(rq, prev, next);
 
 	if (next == prev)
 		return;
-- 
2.17.1

  parent reply	other threads:[~2025-08-19  6:52 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-21  4:09 [PATCH] sched_ext: Fix cpu_released while RT task and SCX task are scheduled concurrently liuwenfang
2025-06-23 19:50 ` 'Tejun Heo'
2025-06-28  6:50   ` [PATCH v2 1/2] " liuwenfang
2025-07-17 21:38     ` 'Tejun Heo'
2025-07-20  9:20       ` liuwenfang
2025-07-20  9:38         ` [PATCH v3 2/3] " liuwenfang
2025-08-12  1:26           ` 'Tejun Heo'
2025-07-20  9:41         ` [PATCH v3 3/3] sched_ext: Fix cpu_released while changing sched policy of the running task liuwenfang
2025-08-12  1:31           ` 'Tejun Heo'
2025-08-19  6:52           ` liuwenfang [this message]
2025-08-19  6:55             ` [PATCH v4 2/3] sched_ext: Fix cpu_released while RT task and SCX task are scheduled concurrently liuwenfang
2025-08-19  7:07               ` [PATCH v4 3/3] sched_ext: Fix cpu_released while changing sched policy of the running task liuwenfang
2025-08-19  7:47               ` [PATCH v4 2/3] sched_ext: Fix cpu_released while RT task and SCX task are scheduled concurrently Peter Zijlstra
2025-08-19  8:47                 ` 回复: " liuwenfang
2025-08-19 10:08                   ` Peter Zijlstra
2025-08-20  0:28                 ` 'Tejun Heo'
2025-08-20  9:18                   ` Peter Zijlstra
2025-08-20 16:52                     ` 'Tejun Heo'
2025-06-28  7:20   ` [PATCH v2 2/2] sched_ext: Fix cpu_released while changing sched policy of the running task liuwenfang
2025-07-17 21:48     ` 'Tejun Heo'
2025-07-18  9:06       ` liuwenfang
2025-07-20  9:36         ` [PATCH v3 1/3] sched_ext: Fix pnt_seq calculation liuwenfang
2025-08-12  0:03           ` 'Tejun Heo'
2025-08-12  0:30             ` 'Tejun Heo'
2025-08-18 10:45               ` liuwenfang
2025-08-18 17:43                 ` 'Tejun Heo'
2025-08-19  7:41                   ` liuwenfang
2025-08-18 17:47           ` Peter Zijlstra
2025-08-19  7:36             ` liuwenfang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=228ebd9e6ed3437996dffe15735a9caa@honor.com \
    --to=liuwenfang@honor.com \
    --cc=arighi@nvidia.com \
    --cc=bsegall@google.com \
    --cc=changwoo@igalia.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=void@manifault.com \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.