All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, sched-ext@lists.linux.dev
Cc: void@manifault.com, arighi@nvidia.com, changwoo@igalia.com,
	emil@etsalapatis.com, Tejun Heo <tj@kernel.org>
Subject: [PATCH 02/15] sched_ext: Wrap global DSQs in per-node structure
Date: Fri,  6 Mar 2026 09:06:10 -1000	[thread overview]
Message-ID: <20260306190623.1076074-3-tj@kernel.org> (raw)
In-Reply-To: <20260306190623.1076074-1-tj@kernel.org>

Global DSQs are currently stored as an array of scx_dispatch_q pointers,
one per NUMA node. To allow adding more per-node data structures, wrap the
global DSQ in scx_sched_pnode and replace global_dsqs with pnode array.

NUMA-aware allocation is maintained. No functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/sched/ext.c          | 32 ++++++++++++++++----------------
 kernel/sched/ext_internal.h |  6 +++++-
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index fe222df1d494..9232abea4f22 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -344,7 +344,7 @@ static bool scx_is_descendant(struct scx_sched *sch, struct scx_sched *ancestor)
 static struct scx_dispatch_q *find_global_dsq(struct scx_sched *sch,
 					      struct task_struct *p)
 {
-	return sch->global_dsqs[cpu_to_node(task_cpu(p))];
+	return &sch->pnode[cpu_to_node(task_cpu(p))]->global_dsq;
 }
 
 static struct scx_dispatch_q *find_user_dsq(struct scx_sched *sch, u64 dsq_id)
@@ -2229,7 +2229,7 @@ static bool consume_global_dsq(struct scx_sched *sch, struct rq *rq)
 {
 	int node = cpu_to_node(cpu_of(rq));
 
-	return consume_dispatch_q(sch, rq, sch->global_dsqs[node]);
+	return consume_dispatch_q(sch, rq, &sch->pnode[node]->global_dsq);
 }
 
 /**
@@ -4148,8 +4148,8 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
 	free_percpu(sch->pcpu);
 
 	for_each_node_state(node, N_POSSIBLE)
-		kfree(sch->global_dsqs[node]);
-	kfree(sch->global_dsqs);
+		kfree(sch->pnode[node]);
+	kfree(sch->pnode);
 
 	rhashtable_walk_enter(&sch->dsq_hash, &rht_iter);
 	do {
@@ -5707,23 +5707,23 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
 	if (ret < 0)
 		goto err_free_ei;
 
-	sch->global_dsqs = kzalloc_objs(sch->global_dsqs[0], nr_node_ids);
-	if (!sch->global_dsqs) {
+	sch->pnode = kzalloc_objs(sch->pnode[0], nr_node_ids);
+	if (!sch->pnode) {
 		ret = -ENOMEM;
 		goto err_free_hash;
 	}
 
 	for_each_node_state(node, N_POSSIBLE) {
-		struct scx_dispatch_q *dsq;
+		struct scx_sched_pnode *pnode;
 
-		dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
-		if (!dsq) {
+		pnode = kzalloc_node(sizeof(*pnode), GFP_KERNEL, node);
+		if (!pnode) {
 			ret = -ENOMEM;
-			goto err_free_gdsqs;
+			goto err_free_pnode;
 		}
 
-		init_dsq(dsq, SCX_DSQ_GLOBAL, sch);
-		sch->global_dsqs[node] = dsq;
+		init_dsq(&pnode->global_dsq, SCX_DSQ_GLOBAL, sch);
+		sch->pnode[node] = pnode;
 	}
 
 	sch->dsp_max_batch = ops->dispatch_max_batch ?: SCX_DSP_DFL_MAX_BATCH;
@@ -5732,7 +5732,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
 				   __alignof__(struct scx_sched_pcpu));
 	if (!sch->pcpu) {
 		ret = -ENOMEM;
-		goto err_free_gdsqs;
+		goto err_free_pnode;
 	}
 
 	for_each_possible_cpu(cpu)
@@ -5819,10 +5819,10 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
 	kthread_destroy_worker(sch->helper);
 err_free_pcpu:
 	free_percpu(sch->pcpu);
-err_free_gdsqs:
+err_free_pnode:
 	for_each_node_state(node, N_POSSIBLE)
-		kfree(sch->global_dsqs[node]);
-	kfree(sch->global_dsqs);
+		kfree(sch->pnode[node]);
+	kfree(sch->pnode);
 err_free_hash:
 	rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
 err_free_ei:
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 4cb97093b872..9e5ebd00ea0c 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -975,6 +975,10 @@ struct scx_sched_pcpu {
 	struct scx_dsp_ctx	dsp_ctx;
 };
 
+struct scx_sched_pnode {
+	struct scx_dispatch_q	global_dsq;
+};
+
 struct scx_sched {
 	struct sched_ext_ops	ops;
 	DECLARE_BITMAP(has_op, SCX_OPI_END);
@@ -988,7 +992,7 @@ struct scx_sched {
 	 * per-node split isn't sufficient, it can be further split.
 	 */
 	struct rhashtable	dsq_hash;
-	struct scx_dispatch_q	**global_dsqs;
+	struct scx_sched_pnode	**pnode;
 	struct scx_sched_pcpu __percpu *pcpu;
 
 	u64			slice_dfl;
-- 
2.53.0


  parent reply	other threads:[~2026-03-06 19:06 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-06 19:06 [PATCHSET sched_ext/for-7.1] sched_ext: Overhaul DSQ reenqueue infrastructure Tejun Heo
2026-03-06 19:06 ` [PATCH 01/15] sched_ext: Relocate scx_bpf_task_cgroup() and its BTF_ID to the end of kfunc section Tejun Heo
2026-03-06 20:45   ` Emil Tsalapatis
2026-03-06 23:20   ` Daniel Jordan
2026-03-06 19:06 ` Tejun Heo [this message]
2026-03-06 20:52   ` [PATCH 02/15] sched_ext: Wrap global DSQs in per-node structure Emil Tsalapatis
2026-03-06 23:20   ` Daniel Jordan
2026-03-06 19:06 ` [PATCH 03/15] sched_ext: Factor out pnode allocation and deallocation into helpers Tejun Heo
2026-03-06 20:54   ` Emil Tsalapatis
2026-03-06 23:21   ` Daniel Jordan
2026-03-06 19:06 ` [PATCH 04/15] sched_ext: Change find_global_dsq() to take CPU number instead of task Tejun Heo
2026-03-06 21:06   ` Emil Tsalapatis
2026-03-06 22:33   ` [PATCH v2 " Tejun Heo
2026-03-06 23:21   ` [PATCH " Daniel Jordan
2026-03-06 19:06 ` [PATCH 05/15] sched_ext: Relocate reenq_local() and run_deferred() Tejun Heo
2026-03-06 21:09   ` Emil Tsalapatis
2026-03-06 23:34   ` Daniel Jordan
2026-03-07  0:12   ` [PATCH v2 05/15] sched_ext: Relocate run_deferred() and its callees Tejun Heo
2026-03-06 19:06 ` [PATCH 06/15] sched_ext: Convert deferred_reenq_locals from llist to regular list Tejun Heo
2026-03-09 17:12   ` Emil Tsalapatis
2026-03-09 17:16     ` Emil Tsalapatis
2026-03-06 19:06 ` [PATCH 07/15] sched_ext: Wrap deferred_reenq_local_node into a struct Tejun Heo
2026-03-09 17:16   ` Emil Tsalapatis
2026-03-06 19:06 ` [PATCH 08/15] sched_ext: Introduce scx_bpf_dsq_reenq() for remote local DSQ reenqueue Tejun Heo
2026-03-09 17:33   ` Emil Tsalapatis
2026-03-06 19:06 ` [PATCH 09/15] sched_ext: Add reenq_flags plumbing to scx_bpf_dsq_reenq() Tejun Heo
2026-03-09 17:47   ` Emil Tsalapatis
2026-03-06 19:06 ` [PATCH 10/15] sched_ext: Add per-CPU data to DSQs Tejun Heo
2026-03-06 22:54   ` Andrea Righi
2026-03-06 22:56     ` Andrea Righi
2026-03-06 23:09   ` [PATCH v2 " Tejun Heo
2026-03-06 19:06 ` [PATCH 11/15] sched_ext: Factor out nldsq_cursor_next_task() and nldsq_cursor_lost_task() Tejun Heo
2026-03-06 19:06 ` [PATCH 12/15] sched_ext: Implement scx_bpf_dsq_reenq() for user DSQs Tejun Heo
2026-03-06 19:06 ` [PATCH 13/15] sched_ext: Optimize schedule_dsq_reenq() with lockless fast path Tejun Heo
2026-03-06 19:06 ` [PATCH 14/15] sched_ext: Simplify task state handling Tejun Heo
2026-03-06 19:06 ` [PATCH 15/15] sched_ext: Add SCX_TASK_REENQ_REASON flags Tejun Heo
2026-03-06 23:14 ` [PATCHSET sched_ext/for-7.1] sched_ext: Overhaul DSQ reenqueue infrastructure Andrea Righi
2026-03-07 15:38 ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260306190623.1076074-3-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=arighi@nvidia.com \
    --cc=changwoo@igalia.com \
    --cc=emil@etsalapatis.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sched-ext@lists.linux.dev \
    --cc=void@manifault.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.