[PATCH v2 05/10] sched_ext: Decouple kfunc unlocked-context check from kf_mask

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Tejun Heo <tj@kernel.org>
To: sched-ext@lists.linux.dev, David Vernet <void@manifault.com>,
	Andrea Righi <arighi@nvidia.com>,
	Changwoo Min <changwoo@igalia.com>
Cc: Cheng-Yang Chou <yphbchou0911@gmail.com>,
	Juntong Deng <juntong.deng@outlook.com>,
	Ching-Chun Huang <jserv@ccns.ncku.edu.tw>,
	Chia-Ping Tsai <chia7712@gmail.com>,
	Emil Tsalapatis <emil@etsalapatis.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH v2 05/10] sched_ext: Decouple kfunc unlocked-context check from kf_mask
Date: Fri, 10 Apr 2026 07:51:53 -1000	[thread overview]
Message-ID: <b3e87aa43672ae32a39b60ea434ed4c1@kernel.org> (raw)
In-Reply-To: <20260410063046.3556100-6-tj@kernel.org>

scx_kf_allowed_if_unlocked() uses !current->scx.kf_mask as a proxy for "no
SCX-tracked lock held". kf_mask is removed in a follow-up patch, so its two
callers - select_cpu_from_kfunc() and scx_dsq_move() - need another basis.

Add a new bool scx_rq.in_select_cpu, set across the SCX_CALL_OP_TASK_RET
that invokes ops.select_cpu(), to capture the one case where SCX itself
holds no lock but try_to_wake_up() holds @p's pi_lock. Together with
scx_locked_rq(), it expresses the same accepted-context set.

select_cpu_from_kfunc() needs a runtime test because it has to take
different locking paths depending on context. Open-code as a three-way
branch. The unlocked branch takes raw_spin_lock_irqsave(&p->pi_lock)
directly - pi_lock alone is enough for the fields the kfunc reads, and is
lighter than task_rq_lock().

scx_dsq_move() doesn't really need a runtime test - its accepted contexts
could be enforced at verifier load time. But since the runtime state is
already there and using it keeps the upcoming load-time filter simpler, just
write it the same way: (scx_locked_rq() || in_select_cpu) &&
!kf_allowed(DISPATCH).

scx_kf_allowed_if_unlocked() is deleted with the conversions.

No semantic change.

v2: s/No functional change/No semantic change/ - the unlocked path now acquires
    pi_lock instead of the heavier task_rq_lock() (Andrea Righi).

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
---
 kernel/sched/ext.c          |    4 +++-
 kernel/sched/ext_idle.c     |   39 +++++++++++++++++----------------------
 kernel/sched/ext_internal.h |    5 -----
 kernel/sched/sched.h        |    1 +
 4 files changed, 21 insertions(+), 28 deletions(-)

--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3308,10 +3308,12 @@ static int select_task_rq_scx(struct tas
 		WARN_ON_ONCE(*ddsp_taskp);
 		*ddsp_taskp = p;

+		this_rq()->scx.in_select_cpu = true;
 		cpu = SCX_CALL_OP_TASK_RET(sch,
 					   SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
 					   select_cpu, NULL, p, prev_cpu,
 					   wake_flags);
+		this_rq()->scx.in_select_cpu = false;
 		p->scx.selected_cpu = cpu;
 		*ddsp_taskp = NULL;
 		if (ops_cpu_valid(sch, cpu, "from ops.select_cpu()"))
@@ -8144,7 +8146,7 @@ static bool scx_dsq_move(struct bpf_iter
 	bool in_balance;
 	unsigned long flags;

-	if (!scx_kf_allowed_if_unlocked() &&
+	if ((scx_locked_rq() || this_rq()->scx.in_select_cpu) &&
 	    !scx_kf_allowed(sch, SCX_KF_DISPATCH))
 		return false;

--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -913,8 +913,8 @@ static s32 select_cpu_from_kfunc(struct
 				 s32 prev_cpu, u64 wake_flags,
 				 const struct cpumask *allowed, u64 flags)
 {
-	struct rq *rq;
-	struct rq_flags rf;
+	unsigned long irq_flags;
+	bool we_locked = false;
 	s32 cpu;

 	if (!ops_cpu_valid(sch, prev_cpu, NULL))
@@ -924,27 +924,22 @@ static s32 select_cpu_from_kfunc(struct
 		return -EBUSY;

 	/*
-	 * If called from an unlocked context, acquire the task's rq lock,
-	 * so that we can safely access p->cpus_ptr and p->nr_cpus_allowed.
+	 * Accessing p->cpus_ptr / p->nr_cpus_allowed needs either @p's rq
+	 * lock or @p's pi_lock. Three cases:
 	 *
-	 * Otherwise, allow to use this kfunc only from ops.select_cpu()
-	 * and ops.select_enqueue().
+	 *  - inside ops.select_cpu(): try_to_wake_up() holds @p's pi_lock.
+	 *  - other rq-locked SCX op: scx_locked_rq() points at the held rq.
+	 *  - truly unlocked (UNLOCKED ops, SYSCALL, non-SCX struct_ops):
+	 *    nothing held, take pi_lock ourselves.
 	 */
-	if (scx_kf_allowed_if_unlocked()) {
-		rq = task_rq_lock(p, &rf);
-	} else {
-		if (!scx_kf_allowed(sch, SCX_KF_SELECT_CPU | SCX_KF_ENQUEUE))
-			return -EPERM;
-		rq = scx_locked_rq();
-	}
-
-	/*
-	 * Validate locking correctness to access p->cpus_ptr and
-	 * p->nr_cpus_allowed: if we're holding an rq lock, we're safe;
-	 * otherwise, assert that p->pi_lock is held.
-	 */
-	if (!rq)
+	if (this_rq()->scx.in_select_cpu) {
 		lockdep_assert_held(&p->pi_lock);
+	} else if (!scx_locked_rq()) {
+		raw_spin_lock_irqsave(&p->pi_lock, irq_flags);
+		we_locked = true;
+	} else if (!scx_kf_allowed(sch, SCX_KF_ENQUEUE)) {
+		return -EPERM;
+	}

 	/*
 	 * This may also be called from ops.enqueue(), so we need to handle
@@ -963,8 +958,8 @@ static s32 select_cpu_from_kfunc(struct
 					 allowed ?: p->cpus_ptr, flags);
 	}

-	if (scx_kf_allowed_if_unlocked())
-		task_rq_unlock(rq, p, &rf);
+	if (we_locked)
+		raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags);

 	return cpu;
 }
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1372,11 +1372,6 @@ static inline struct rq *scx_locked_rq(v
 	return __this_cpu_read(scx_locked_rq_state);
 }

-static inline bool scx_kf_allowed_if_unlocked(void)
-{
-	return !current->scx.kf_mask;
-}
-
 static inline bool scx_bypassing(struct scx_sched *sch, s32 cpu)
 {
 	return unlikely(per_cpu_ptr(sch->pcpu, cpu)->flags &
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -798,6 +798,7 @@ struct scx_rq {
 	u64			extra_enq_flags;	/* see move_task_to_local_dsq() */
 	u32			nr_running;
 	u32			cpuperf_target;		/* [0, SCHED_CAPACITY_SCALE] */
+	bool			in_select_cpu;
 	bool			cpu_released;
 	u32			flags;
 	u32			nr_immed;		/* ENQ_IMMED tasks on local_dsq */

next prev parent reply	other threads:[~2026-04-10 17:51 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-10  6:30 [PATCHSET sched_ext/for-7.1] sched_ext: Add verifier-time kfunc context filter Tejun Heo
2026-04-10  6:30 ` [PATCH 01/10] sched_ext: Drop TRACING access to select_cpu kfuncs Tejun Heo
2026-04-10 16:04   ` Andrea Righi
2026-04-10  6:30 ` [PATCH 02/10] sched_ext: Add select_cpu kfuncs to scx_kfunc_ids_unlocked Tejun Heo
2026-04-10 16:07   ` Andrea Righi
2026-04-10 17:51   ` [PATCH v2 " Tejun Heo
2026-04-10  6:30 ` [PATCH 03/10] sched_ext: Track @p's rq lock across set_cpus_allowed_scx -> ops.set_cpumask Tejun Heo
2026-04-10 16:12   ` Andrea Righi
2026-04-10 17:51   ` [PATCH v2 " Tejun Heo
2026-04-10  6:30 ` [PATCH 04/10] sched_ext: Fix ops.cgroup_move() invocation kf_mask and rq tracking Tejun Heo
2026-04-10 16:16   ` Andrea Righi
2026-04-10 17:51   ` [PATCH v2 " Tejun Heo
2026-04-10  6:30 ` [PATCH 05/10] sched_ext: Decouple kfunc unlocked-context check from kf_mask Tejun Heo
2026-04-10 16:34   ` Andrea Righi
2026-04-10 17:51   ` Tejun Heo [this message]
2026-04-10  6:30 ` [PATCH 06/10] sched_ext: Drop redundant rq-locked check from scx_bpf_task_cgroup() Tejun Heo
2026-04-10 16:36   ` Andrea Righi
2026-04-10  6:30 ` [PATCH 07/10] sched_ext: Add verifier-time kfunc context filter Tejun Heo
2026-04-10 16:49   ` Andrea Righi
2026-04-14 12:38   ` Cheng-Yang Chou
2026-04-14 17:25     ` Tejun Heo
2026-04-10  6:30 ` [PATCH 08/10] sched_ext: Remove runtime kfunc mask enforcement Tejun Heo
2026-04-10 16:50   ` Andrea Righi
2026-04-10  6:30 ` [PATCH 09/10] sched_ext: Rename scx_kf_allowed_on_arg_tasks() to scx_kf_arg_task_ok() Tejun Heo
2026-04-10 16:55   ` Andrea Righi
2026-04-10  6:30 ` [PATCH 10/10] sched_ext: Warn on task-based SCX op recursion Tejun Heo
2026-04-10 17:38   ` Andrea Righi
2026-04-10 17:45 ` [PATCHSET sched_ext/for-7.1] sched_ext: Add verifier-time kfunc context filter Andrea Righi
2026-04-11  6:17   ` Cheng-Yang Chou
2026-04-11  7:41     ` Tejun Heo
2026-04-11 15:09       ` Cheng-Yang Chou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b3e87aa43672ae32a39b60ea434ed4c1@kernel.org \
    --to=tj@kernel.org \
    --cc=arighi@nvidia.com \
    --cc=changwoo@igalia.com \
    --cc=chia7712@gmail.com \
    --cc=emil@etsalapatis.com \
    --cc=jserv@ccns.ncku.edu.tw \
    --cc=juntong.deng@outlook.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sched-ext@lists.linux.dev \
    --cc=void@manifault.com \
    --cc=yphbchou0911@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.