[PATCH 11/12] sched_ext: Move disable machinery into scx_sched

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Tejun Heo <tj@kernel.org>
To: void@manifault.com, arighi@nvidia.com, multics69@gmail.com
Cc: linux-kernel@vger.kernel.org, sched-ext@meta.com,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH 11/12] sched_ext: Move disable machinery into scx_sched
Date: Fri, 25 Apr 2025 11:58:26 -1000	[thread overview]
Message-ID: <20250425215840.2334972-12-tj@kernel.org> (raw)
In-Reply-To: <20250425215840.2334972-1-tj@kernel.org>

Because disable can be triggered from any place and the scheduler cannot be
trusted, disable path uses an irq_work to bounce and a kthread_work which is
executed on an RT helper kthread to perform disable. These must be per
scheduler instance to guarantee forward progress. Move them into scx_sched.

- If an scx_sched is accessible, its helper kthread is always valid making
  the `helper` check in schedule_scx_disable_work() unnecessary. As the
  function becomes trivial after the removal of the test, inline it.

- scx_create_rt_helper() has only one user - creation of the disable helper
  kthread. Inline it into scx_alloc_and_add_sched().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/sched/ext.c | 73 ++++++++++++++++------------------------------
 1 file changed, 25 insertions(+), 48 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 154c354856fc..06973ec6177a 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -851,6 +851,10 @@ struct scx_sched {
 	struct scx_exit_info	*exit_info;
 
 	struct kobject		kobj;
+
+	struct kthread_worker	*helper;
+	struct irq_work		error_irq_work;
+	struct kthread_work	disable_work;
 	struct rcu_work		rcu_work;
 };
 
@@ -1024,7 +1028,6 @@ static DEFINE_SPINLOCK(scx_tasks_lock);
 static LIST_HEAD(scx_tasks);
 
 /* ops enable/disable */
-static struct kthread_worker *scx_helper;
 static DEFINE_MUTEX(scx_enable_mutex);
 DEFINE_STATIC_KEY_FALSE(__scx_enabled);
 DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
@@ -4418,6 +4421,7 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
 	struct scx_dispatch_q *dsq;
 	int node;
 
+	kthread_stop(sch->helper->task);
 	free_percpu(sch->event_stats_cpu);
 
 	for_each_node_state(node, N_POSSIBLE)
@@ -4740,7 +4744,7 @@ static const char *scx_exit_reason(enum scx_exit_kind kind)
 
 static void scx_disable_workfn(struct kthread_work *work)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = container_of(work, struct scx_sched, disable_work);
 	struct scx_exit_info *ei = sch->exit_info;
 	struct scx_task_iter sti;
 	struct task_struct *p;
@@ -4886,20 +4890,6 @@ static void scx_disable_workfn(struct kthread_work *work)
 	scx_bypass(false);
 }
 
-static DEFINE_KTHREAD_WORK(scx_disable_work, scx_disable_workfn);
-
-static void schedule_scx_disable_work(void)
-{
-	struct kthread_worker *helper = READ_ONCE(scx_helper);
-
-	/*
-	 * We may be called spuriously before the first bpf_sched_ext_reg(). If
-	 * scx_helper isn't set up yet, there's nothing to do.
-	 */
-	if (helper)
-		kthread_queue_work(helper, &scx_disable_work);
-}
-
 static void scx_disable(enum scx_exit_kind kind)
 {
 	int none = SCX_EXIT_NONE;
@@ -4912,7 +4902,7 @@ static void scx_disable(enum scx_exit_kind kind)
 	sch = rcu_dereference(scx_root);
 	if (sch) {
 		atomic_try_cmpxchg(&sch->exit_kind, &none, kind);
-		schedule_scx_disable_work();
+		kthread_queue_work(sch->helper, &sch->disable_work);
 	}
 	rcu_read_unlock();
 }
@@ -5214,16 +5204,15 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
 
 static void scx_error_irq_workfn(struct irq_work *irq_work)
 {
-	struct scx_exit_info *ei = scx_root->exit_info;
+	struct scx_sched *sch = container_of(irq_work, struct scx_sched, error_irq_work);
+	struct scx_exit_info *ei = sch->exit_info;
 
 	if (ei->kind >= SCX_EXIT_ERROR)
-		scx_dump_state(ei, scx_root->ops.exit_dump_len);
+		scx_dump_state(ei, sch->ops.exit_dump_len);
 
-	schedule_scx_disable_work();
+	kthread_queue_work(sch->helper, &sch->disable_work);
 }
 
-static DEFINE_IRQ_WORK(scx_error_irq_work, scx_error_irq_workfn);
-
 static __printf(3, 4) void __scx_exit(enum scx_exit_kind kind, s64 exit_code,
 				      const char *fmt, ...)
 {
@@ -5250,17 +5239,7 @@ static __printf(3, 4) void __scx_exit(enum scx_exit_kind kind, s64 exit_code,
 	ei->kind = kind;
 	ei->reason = scx_exit_reason(ei->kind);
 
-	irq_work_queue(&scx_error_irq_work);
-}
-
-static struct kthread_worker *scx_create_rt_helper(const char *name)
-{
-	struct kthread_worker *helper;
-
-	helper = kthread_run_worker(0, name);
-	if (helper)
-		sched_set_fifo(helper->task);
-	return helper;
+	irq_work_queue(&scx_root->error_irq_work);
 }
 
 static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
@@ -5306,17 +5285,26 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
 	if (!sch->event_stats_cpu)
 		goto err_free_gdsqs;
 
+	sch->helper = kthread_run_worker(0, "sched_ext_helper");
+	if (!sch->helper)
+		goto err_free_event_stats;
+	sched_set_fifo(sch->helper->task);
+
 	atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
+	init_irq_work(&sch->error_irq_work, scx_error_irq_workfn);
+	kthread_init_work(&sch->disable_work, scx_disable_workfn);
 	sch->ops = *ops;
 	ops->priv = sch;
 
 	sch->kobj.kset = scx_kset;
 	ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");
 	if (ret < 0)
-		goto err_free_event_stats;
+		goto err_stop_helper;
 
 	return sch;
 
+err_stop_helper:
+	kthread_stop(sch->helper->task);
 err_free_event_stats:
 	free_percpu(sch->event_stats_cpu);
 err_free_gdsqs:
@@ -5394,14 +5382,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
 
 	mutex_lock(&scx_enable_mutex);
 
-	if (!scx_helper) {
-		WRITE_ONCE(scx_helper, scx_create_rt_helper("sched_ext_helper"));
-		if (!scx_helper) {
-			ret = -ENOMEM;
-			goto err_unlock;
-		}
-	}
-
 	if (scx_enable_state() != SCX_DISABLED) {
 		ret = -EBUSY;
 		goto err_unlock;
@@ -5630,7 +5610,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
 	 * completion. sch's base reference will be put by bpf_scx_unreg().
 	 */
 	scx_error("scx_enable() failed (%d)", ret);
-	kthread_flush_work(&scx_disable_work);
+	kthread_flush_work(&sch->disable_work);
 	return 0;
 }
 
@@ -5783,7 +5763,7 @@ static void bpf_scx_unreg(void *kdata, struct bpf_link *link)
 	struct scx_sched *sch = ops->priv;
 
 	scx_disable(SCX_EXIT_UNREG);
-	kthread_flush_work(&scx_disable_work);
+	kthread_flush_work(&sch->disable_work);
 	kobject_put(&sch->kobj);
 }
 
@@ -5906,10 +5886,7 @@ static struct bpf_struct_ops bpf_sched_ext_ops = {
 
 static void sysrq_handle_sched_ext_reset(u8 key)
 {
-	if (scx_helper)
-		scx_disable(SCX_EXIT_SYSRQ);
-	else
-		pr_info("sched_ext: BPF scheduler not yet used\n");
+	scx_disable(SCX_EXIT_SYSRQ);
 }
 
 static const struct sysrq_key_op sysrq_sched_ext_reset_op = {
-- 
2.49.0

next prev parent reply	other threads:[~2025-04-25 21:58 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-25 21:58 [PATCHSET v2 sched_ext/for-6.16] sched_ext: Introduce scx_sched Tejun Heo
2025-04-25 21:58 ` [PATCH 01/12] " Tejun Heo
2025-04-25 21:58 ` [PATCH 02/12] sched_ext: Avoid NULL scx_root deref through SCX_HAS_OP() Tejun Heo
2025-04-25 21:58 ` [PATCH 03/12] sched_ext: Use dynamic allocation for scx_sched Tejun Heo
2025-04-25 21:58 ` [PATCH 04/12] sched_ext: Inline create_dsq() into scx_bpf_create_dsq() Tejun Heo
2025-04-25 21:58 ` [PATCH 05/12] sched_ext: Factor out scx_alloc_and_add_sched() Tejun Heo
2025-04-25 21:58 ` [PATCH 06/12] sched_ext: Move dsq_hash into scx_sched Tejun Heo
2025-04-26 20:25   ` Andrea Righi
2025-04-28 20:43   ` [PATCH v2 " Tejun Heo
2025-04-28 23:34     ` Changwoo Min
2025-04-25 21:58 ` [PATCH 07/12] sched_ext: Move global_dsqs " Tejun Heo
2025-04-25 21:58 ` [PATCH 08/12] sched_ext: Relocate scx_event_stats definition Tejun Heo
2025-04-25 21:58 ` [PATCH 09/12] sched_ext: Factor out scx_read_events() Tejun Heo
2025-04-25 21:58 ` [PATCH 10/12] sched_ext: Move event_stats_cpu into scx_sched Tejun Heo
2025-04-25 21:58 ` Tejun Heo [this message]
2025-04-25 21:58 ` [PATCH 12/12] sched_ext: Clean up SCX_EXIT_NONE handling in scx_disable_workfn() Tejun Heo
2025-04-27  7:04 ` [PATCHSET v2 sched_ext/for-6.16] sched_ext: Introduce scx_sched Changwoo Min
2025-04-28 20:58 ` Andrea Righi
2025-04-29 18:41 ` Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2025-04-23 23:44 [PATCHSET " Tejun Heo
2025-04-23 23:44 ` [PATCH 11/12] sched_ext: Move disable machinery into scx_sched Tejun Heo

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:154c354856f dfblob:06973ec6177 )
 OR (
bs:"[PATCH 11/12] sched_ext: Move disable machinery into scx_sched" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250425215840.2334972-12-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=arighi@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=multics69@gmail.com \
    --cc=sched-ext@meta.com \
    --cc=void@manifault.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox