All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: void@manifault.com, peterz@infradead.org
Cc: linux-kernel@vger.kernel.org, kernel-team@meta.com,
	mingo@redhat.com, Tejun Heo <tj@kernel.org>
Subject: [PATCH 4/6] sched_ext: Simplify UP support by enabling sched_class->balance() in UP
Date: Sat,  3 Aug 2024 16:40:11 -1000	[thread overview]
Message-ID: <20240804024047.100355-5-tj@kernel.org> (raw)
In-Reply-To: <20240804024047.100355-1-tj@kernel.org>

On SMP, SCX performs dispatch from sched_class->balance(). As balance() was
not available in UP, it instead called the internal balance function from
put_prev_task_scx() and pick_next_task_scx() to emulate the effect, which is
rather nasty.

Enabling sched_class->balance() on UP shouldn't cause any meaningful
overhead. Enable balance() on UP and drop the ugly workaround.

Signed-off-by: Tejun Heo <tj@kernel.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
---
 kernel/sched/core.c  |  4 +---
 kernel/sched/ext.c   | 41 +----------------------------------------
 kernel/sched/sched.h |  2 +-
 3 files changed, 3 insertions(+), 44 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0532b27fd9af..d2ccc2c4b4d3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5826,7 +5826,6 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
 static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
 				  struct rq_flags *rf)
 {
-#ifdef CONFIG_SMP
 	const struct sched_class *start_class = prev->sched_class;
 	const struct sched_class *class;
 
@@ -5849,10 +5848,9 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
 	 * a runnable task of @class priority or higher.
 	 */
 	for_active_class_range(class, start_class, &idle_sched_class) {
-		if (class->balance(rq, prev, rf))
+		if (class->balance && class->balance(rq, prev, rf))
 			break;
 	}
-#endif
 
 	put_prev_task(rq, prev);
 }
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 48f8f57f5954..09f394bb4889 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2616,7 +2616,6 @@ static int balance_one(struct rq *rq, struct task_struct *prev, bool local)
 	return has_tasks;
 }
 
-#ifdef CONFIG_SMP
 static int balance_scx(struct rq *rq, struct task_struct *prev,
 		       struct rq_flags *rf)
 {
@@ -2650,7 +2649,6 @@ static int balance_scx(struct rq *rq, struct task_struct *prev,
 
 	return ret;
 }
-#endif
 
 static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
 {
@@ -2719,37 +2717,6 @@ static void process_ddsp_deferred_locals(struct rq *rq)
 
 static void put_prev_task_scx(struct rq *rq, struct task_struct *p)
 {
-#ifndef CONFIG_SMP
-	/*
-	 * UP workaround.
-	 *
-	 * Because SCX may transfer tasks across CPUs during dispatch, dispatch
-	 * is performed from its balance operation which isn't called in UP.
-	 * Let's work around by calling it from the operations which come right
-	 * after.
-	 *
-	 * 1. If the prev task is on SCX, pick_next_task() calls
-	 *    .put_prev_task() right after. As .put_prev_task() is also called
-	 *    from other places, we need to distinguish the calls which can be
-	 *    done by looking at the previous task's state - if still queued or
-	 *    dequeued with %SCX_DEQ_SLEEP, the caller must be pick_next_task().
-	 *    This case is handled here.
-	 *
-	 * 2. If the prev task is not on SCX, the first following call into SCX
-	 *    will be .pick_next_task(), which is covered by calling
-	 *    balance_scx() from pick_next_task_scx().
-	 *
-	 * Note that we can't merge the first case into the second as
-	 * balance_scx() must be called before the previous SCX task goes
-	 * through put_prev_task_scx().
-	 *
-         * @rq is pinned and can't be unlocked. As UP doesn't transfer tasks
-         * around, balance_one() doesn't need to.
-	 */
-	if (p->scx.flags & (SCX_TASK_QUEUED | SCX_TASK_DEQD_FOR_SLEEP))
-		balance_one(rq, p, true);
-#endif
-
 	update_curr_scx(rq);
 
 	/* see dequeue_task_scx() on why we skip when !QUEUED */
@@ -2807,12 +2774,6 @@ static struct task_struct *pick_next_task_scx(struct rq *rq)
 {
 	struct task_struct *p;
 
-#ifndef CONFIG_SMP
-	/* UP workaround - see the comment at the head of put_prev_task_scx() */
-	if (unlikely(rq->curr->sched_class != &ext_sched_class))
-		balance_one(rq, rq->curr, true);
-#endif
-
 	p = first_local_task(rq);
 	if (!p)
 		return NULL;
@@ -3673,6 +3634,7 @@ DEFINE_SCHED_CLASS(ext) = {
 
 	.wakeup_preempt		= wakeup_preempt_scx,
 
+	.balance		= balance_scx,
 	.pick_next_task		= pick_next_task_scx,
 
 	.put_prev_task		= put_prev_task_scx,
@@ -3681,7 +3643,6 @@ DEFINE_SCHED_CLASS(ext) = {
 	.switch_class		= switch_class_scx,
 
 #ifdef CONFIG_SMP
-	.balance		= balance_scx,
 	.select_task_rq		= select_task_rq_scx,
 	.task_woken		= task_woken_scx,
 	.set_cpus_allowed	= set_cpus_allowed_scx,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 42b4d1428c2c..9b88a46d3fce 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2357,6 +2357,7 @@ struct sched_class {
 
 	void (*wakeup_preempt)(struct rq *rq, struct task_struct *p, int flags);
 
+	int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
 	struct task_struct *(*pick_next_task)(struct rq *rq);
 
 	void (*put_prev_task)(struct rq *rq, struct task_struct *p);
@@ -2365,7 +2366,6 @@ struct sched_class {
 	void (*switch_class)(struct rq *rq, struct task_struct *next);
 
 #ifdef CONFIG_SMP
-	int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
 	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
 
 	struct task_struct * (*pick_task)(struct rq *rq);
-- 
2.46.0


  parent reply	other threads:[~2024-08-04  2:40 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-04  2:40 [PATCHSET sched_ext/for-6.12] sched_ext: Misc updates Tejun Heo
2024-08-04  2:40 ` [PATCH 1/6] sched_ext: Simplify scx_can_stop_tick() invocation in sched_can_stop_tick() Tejun Heo
2024-08-05 17:55   ` David Vernet
2024-08-04  2:40 ` [PATCH 2/6] sched_ext: Add scx_enabled() test to @start_class promotion in put_prev_task_balance() Tejun Heo
2024-08-05 17:57   ` David Vernet
2024-08-04  2:40 ` [PATCH 3/6] sched_ext: Use update_curr_common() in update_curr_scx() Tejun Heo
2024-08-05 18:23   ` David Vernet
2024-08-04  2:40 ` Tejun Heo [this message]
2024-08-05 19:49   ` [PATCH 4/6] sched_ext: Simplify UP support by enabling sched_class->balance() in UP David Vernet
2024-08-04  2:40 ` [PATCH 5/6] sched_ext: Improve comment on idle_sched_class exception in scx_task_iter_next_locked() Tejun Heo
2024-08-05 19:50   ` David Vernet
2024-08-04  2:40 ` [PATCH 6/6] sched_ext: Make task_can_run_on_remote_rq() use common task_allowed_on_cpu() Tejun Heo
2024-08-05 19:55   ` David Vernet
2024-08-06  8:12   ` Peter Zijlstra
2024-08-06 17:04     ` Tejun Heo
2024-08-06 19:39   ` [PATCH v2 " Tejun Heo
2024-08-06  8:13 ` [PATCHSET sched_ext/for-6.12] sched_ext: Misc updates Peter Zijlstra
2024-08-06 19:39 ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240804024047.100355-5-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=void@manifault.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.