All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: David Vernet <void@manifault.com>,
	Andrea Righi <arighi@nvidia.com>,
	Changwoo Min <changwoo@igalia.com>,
	linux-kernel@vger.kernel.org
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 11/12] sched_ext: Move disable machinery into scx_sched
Date: Wed, 23 Apr 2025 13:44:49 -1000	[thread overview]
Message-ID: <20250423234542.1890867-12-tj@kernel.org> (raw)
In-Reply-To: <20250423234542.1890867-1-tj@kernel.org>

Because disable can be triggered from any place and the scheduler cannot be
trusted, disable path uses an irq_work to bounce and a kthread_work which is
executed on an RT helper kthread to perform disable. These must be per
scheduler instance to guarantee forward progress. Move them into scx_sched.

- If an scx_sched is accessible, its helper kthread is always valid making
  the `helper` check in schedule_scx_disable_work() unnecessary. As the
  function becomes trivial after the removal of the test, inline it.

- scx_create_rt_helper() has only one user - creation of the disable helper
  kthread. Inline it into scx_alloc_and_add_sched().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/sched/ext.c | 73 ++++++++++++++++------------------------------
 1 file changed, 25 insertions(+), 48 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 75c91b58430c..d27193010b6a 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -848,6 +848,10 @@ struct scx_sched {
 	struct scx_exit_info	*exit_info;
 
 	struct kobject		kobj;
+
+	struct kthread_worker	*helper;
+	struct irq_work		error_irq_work;
+	struct kthread_work	disable_work;
 	struct rcu_work		rcu_work;
 };
 
@@ -1021,7 +1025,6 @@ static DEFINE_SPINLOCK(scx_tasks_lock);
 static LIST_HEAD(scx_tasks);
 
 /* ops enable/disable */
-static struct kthread_worker *scx_helper;
 static DEFINE_MUTEX(scx_enable_mutex);
 DEFINE_STATIC_KEY_FALSE(__scx_enabled);
 DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
@@ -4415,6 +4418,7 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
 	struct scx_dispatch_q *dsq;
 	int node;
 
+	kthread_stop(sch->helper->task);
 	free_percpu(sch->event_stats_cpu);
 
 	for_each_node_state(node, N_POSSIBLE)
@@ -4737,7 +4741,7 @@ static const char *scx_exit_reason(enum scx_exit_kind kind)
 
 static void scx_disable_workfn(struct kthread_work *work)
 {
-	struct scx_sched *sch = scx_root;
+	struct scx_sched *sch = container_of(work, struct scx_sched, disable_work);
 	struct scx_exit_info *ei = sch->exit_info;
 	struct scx_task_iter sti;
 	struct task_struct *p;
@@ -4883,20 +4887,6 @@ static void scx_disable_workfn(struct kthread_work *work)
 	scx_bypass(false);
 }
 
-static DEFINE_KTHREAD_WORK(scx_disable_work, scx_disable_workfn);
-
-static void schedule_scx_disable_work(void)
-{
-	struct kthread_worker *helper = READ_ONCE(scx_helper);
-
-	/*
-	 * We may be called spuriously before the first bpf_sched_ext_reg(). If
-	 * scx_helper isn't set up yet, there's nothing to do.
-	 */
-	if (helper)
-		kthread_queue_work(helper, &scx_disable_work);
-}
-
 static void scx_disable(enum scx_exit_kind kind)
 {
 	int none = SCX_EXIT_NONE;
@@ -4909,7 +4899,7 @@ static void scx_disable(enum scx_exit_kind kind)
 	sch = rcu_dereference(scx_root);
 	if (sch) {
 		atomic_try_cmpxchg(&sch->exit_kind, &none, kind);
-		schedule_scx_disable_work();
+		kthread_queue_work(sch->helper, &sch->disable_work);
 	}
 	rcu_read_unlock();
 }
@@ -5211,16 +5201,15 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
 
 static void scx_error_irq_workfn(struct irq_work *irq_work)
 {
-	struct scx_exit_info *ei = scx_root->exit_info;
+	struct scx_sched *sch = container_of(irq_work, struct scx_sched, error_irq_work);
+	struct scx_exit_info *ei = sch->exit_info;
 
 	if (ei->kind >= SCX_EXIT_ERROR)
-		scx_dump_state(ei, scx_root->ops.exit_dump_len);
+		scx_dump_state(ei, sch->ops.exit_dump_len);
 
-	schedule_scx_disable_work();
+	kthread_queue_work(sch->helper, &sch->disable_work);
 }
 
-static DEFINE_IRQ_WORK(scx_error_irq_work, scx_error_irq_workfn);
-
 static __printf(3, 4) void __scx_exit(enum scx_exit_kind kind, s64 exit_code,
 				      const char *fmt, ...)
 {
@@ -5247,17 +5236,7 @@ static __printf(3, 4) void __scx_exit(enum scx_exit_kind kind, s64 exit_code,
 	ei->kind = kind;
 	ei->reason = scx_exit_reason(ei->kind);
 
-	irq_work_queue(&scx_error_irq_work);
-}
-
-static struct kthread_worker *scx_create_rt_helper(const char *name)
-{
-	struct kthread_worker *helper;
-
-	helper = kthread_run_worker(0, name);
-	if (helper)
-		sched_set_fifo(helper->task);
-	return helper;
+	irq_work_queue(&scx_root->error_irq_work);
 }
 
 static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
@@ -5303,16 +5282,25 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
 	if (!sch->event_stats_cpu)
 		goto err_free_gdsqs;
 
+	sch->helper = kthread_run_worker(0, "sched_ext_helper");
+	if (!sch->helper)
+		goto err_event_stats;
+	sched_set_fifo(sch->helper->task);
+
 	atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
+	init_irq_work(&sch->error_irq_work, scx_error_irq_workfn);
+	kthread_init_work(&sch->disable_work, scx_disable_workfn);
 	sch->ops = *ops;
 
 	sch->kobj.kset = scx_kset;
 	ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");
 	if (ret < 0)
-		goto err_event_stats;
+		goto err_stop_helper;
 
 	return sch;
 
+err_stop_helper:
+	kthread_stop(sch->helper->task);
 err_event_stats:
 	free_percpu(sch->event_stats_cpu);
 err_free_gdsqs:
@@ -5390,14 +5378,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
 
 	mutex_lock(&scx_enable_mutex);
 
-	if (!scx_helper) {
-		WRITE_ONCE(scx_helper, scx_create_rt_helper("sched_ext_helper"));
-		if (!scx_helper) {
-			ret = -ENOMEM;
-			goto err_unlock;
-		}
-	}
-
 	if (scx_enable_state() != SCX_DISABLED) {
 		ret = -EBUSY;
 		goto err_unlock;
@@ -5626,7 +5606,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
 	 * completion.
 	 */
 	scx_error("scx_enable() failed (%d)", ret);
-	kthread_flush_work(&scx_disable_work);
+	kthread_flush_work(&sch->disable_work);
 	kobject_put(&sch->kobj);
 	return 0;
 }
@@ -5779,7 +5759,7 @@ static void bpf_scx_unreg(void *kdata, struct bpf_link *link)
 	struct scx_sched *sch = scx_root;
 
 	scx_disable(SCX_EXIT_UNREG);
-	kthread_flush_work(&scx_disable_work);
+	kthread_flush_work(&sch->disable_work);
 	kobject_put(&sch->kobj);
 }
 
@@ -5902,10 +5882,7 @@ static struct bpf_struct_ops bpf_sched_ext_ops = {
 
 static void sysrq_handle_sched_ext_reset(u8 key)
 {
-	if (scx_helper)
-		scx_disable(SCX_EXIT_SYSRQ);
-	else
-		pr_info("sched_ext: BPF scheduler not yet used\n");
+	scx_disable(SCX_EXIT_SYSRQ);
 }
 
 static const struct sysrq_key_op sysrq_sched_ext_reset_op = {
-- 
2.49.0


  parent reply	other threads:[~2025-04-23 23:45 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-23 23:44 [PATCHSET sched_ext/for-6.16] sched_ext: Introduce scx_sched Tejun Heo
2025-04-23 23:44 ` [PATCH 01/12] " Tejun Heo
2025-04-23 23:44 ` [PATCH 02/12] sched_ext: Avoid NULL scx_root deref through SCX_HAS_OP() Tejun Heo
2025-04-24  7:23   ` Chengming Zhou
2025-04-24 18:55     ` Tejun Heo
2025-04-23 23:44 ` [PATCH 03/12] sched_ext: Use dynamic allocation for scx_sched Tejun Heo
2025-04-25 10:14   ` Andrea Righi
2025-04-25 19:48     ` Tejun Heo
2025-04-23 23:44 ` [PATCH 04/12] sched_ext: Inline create_dsq() into scx_bpf_create_dsq() Tejun Heo
2025-04-23 23:44 ` [PATCH 05/12] sched_ext: Factor out scx_alloc_and_add_sched() Tejun Heo
2025-04-23 23:44 ` [PATCH 06/12] sched_ext: Move dsq_hash into scx_sched Tejun Heo
2025-04-23 23:44 ` [PATCH 07/12] sched_ext: Move global_dsqs " Tejun Heo
2025-04-23 23:44 ` [PATCH 08/12] sched_ext: Relocate scx_event_stats definition Tejun Heo
2025-04-23 23:44 ` [PATCH 09/12] sched_ext: Factor out scx_read_events() Tejun Heo
2025-04-23 23:44 ` [PATCH 10/12] sched_ext: Move event_stats_cpu into scx_sched Tejun Heo
2025-04-25  5:38   ` Changwoo Min
2025-04-23 23:44 ` Tejun Heo [this message]
2025-04-23 23:44 ` [PATCH 12/12] sched_ext: Clean up SCX_EXIT_NONE handling in scx_disable_workfn() Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2025-04-25 21:58 [PATCHSET v2 sched_ext/for-6.16] sched_ext: Introduce scx_sched Tejun Heo
2025-04-25 21:58 ` [PATCH 11/12] sched_ext: Move disable machinery into scx_sched Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250423234542.1890867-12-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=arighi@nvidia.com \
    --cc=changwoo@igalia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=void@manifault.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.