* linux-next: manual merge of the sched-ext tree with the tip tree
@ 2026-03-09 18:49 Mark Brown
2026-03-09 20:02 ` Tejun Heo
2026-03-09 20:11 ` [PATCH sched_ext/for-7.1] sched_ext: Replace system_unbound_wq with system_dfl_wq in scx_kobj_release() Tejun Heo
0 siblings, 2 replies; 13+ messages in thread
From: Mark Brown @ 2026-03-09 18:49 UTC (permalink / raw)
To: Tejun Heo
Cc: Ingo Molnar, Linux Kernel Mailing List, Linux Next Mailing List,
Marco Crivellari, Peter Zijlstra
[-- Attachment #1: Type: text/plain, Size: 9292 bytes --]
Hi all,
Today's linux-next merge of the sched-ext tree got a conflict in:
kernel/sched/ext.c
between commits:
c2a57380df9dd ("sched: Replace use of system_unbound_wq with system_dfl_wq")
from the tip tree and commit:
cde94c032b32b ("sched_ext: Make watchdog sub-sched aware")
from the sched-ext tree.
I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging. You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.
I do note there's another system_unbound_wq usage there which for some
reason wasn't updated...
diff --cc kernel/sched/ext.c
index 7278d57496478,d6d8073370130..0000000000000
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@@ -2766,8 -3205,11 +3205,11 @@@ static void scx_watchdog_workfn(struct
cond_resched();
}
- queue_delayed_work(system_dfl_wq, to_delayed_work(work),
- READ_ONCE(scx_watchdog_timeout) / 2);
+
+ intv = READ_ONCE(scx_watchdog_interval);
+ if (intv < ULONG_MAX)
- queue_delayed_work(system_unbound_wq, to_delayed_work(work),
++ queue_delayed_work(system_dfl_wq, to_delayed_work(work),
+ intv);
}
void scx_tick(struct rq *rq)
@@@ -4282,9 -5218,247 +5218,247 @@@ static void free_kick_syncs(void
}
}
- static void scx_disable_workfn(struct kthread_work *work)
+ static void refresh_watchdog(void)
+ {
+ struct scx_sched *sch;
+ unsigned long intv = ULONG_MAX;
+
+ /* take the shortest timeout and use its half for watchdog interval */
+ rcu_read_lock();
+ list_for_each_entry_rcu(sch, &scx_sched_all, all)
+ intv = max(min(intv, sch->watchdog_timeout / 2), 1);
+ rcu_read_unlock();
+
+ WRITE_ONCE(scx_watchdog_timestamp, jiffies);
+ WRITE_ONCE(scx_watchdog_interval, intv);
+
+ if (intv < ULONG_MAX)
- mod_delayed_work(system_unbound_wq, &scx_watchdog_work, intv);
++ mod_delayed_work(system_dfl_wq, &scx_watchdog_work, intv);
+ else
+ cancel_delayed_work_sync(&scx_watchdog_work);
+ }
+
+ static s32 scx_link_sched(struct scx_sched *sch)
+ {
+ scoped_guard(raw_spinlock_irq, &scx_sched_lock) {
+ #ifdef CONFIG_EXT_SUB_SCHED
+ struct scx_sched *parent = scx_parent(sch);
+ s32 ret;
+
+ if (parent) {
+ ret = rhashtable_lookup_insert_fast(&scx_sched_hash,
+ &sch->hash_node, scx_sched_hash_params);
+ if (ret) {
+ scx_error(sch, "failed to insert into scx_sched_hash (%d)", ret);
+ return ret;
+ }
+
+ list_add_tail(&sch->sibling, &parent->children);
+ }
+ #endif /* CONFIG_EXT_SUB_SCHED */
+
+ list_add_tail_rcu(&sch->all, &scx_sched_all);
+ }
+
+ refresh_watchdog();
+ return 0;
+ }
+
+ static void scx_unlink_sched(struct scx_sched *sch)
+ {
+ scoped_guard(raw_spinlock_irq, &scx_sched_lock) {
+ #ifdef CONFIG_EXT_SUB_SCHED
+ if (scx_parent(sch)) {
+ rhashtable_remove_fast(&scx_sched_hash, &sch->hash_node,
+ scx_sched_hash_params);
+ list_del_init(&sch->sibling);
+ }
+ #endif /* CONFIG_EXT_SUB_SCHED */
+ list_del_rcu(&sch->all);
+ }
+
+ refresh_watchdog();
+ }
+
+ #ifdef CONFIG_EXT_SUB_SCHED
+ static DECLARE_WAIT_QUEUE_HEAD(scx_unlink_waitq);
+
+ static void drain_descendants(struct scx_sched *sch)
+ {
+ /*
+ * Child scheds that finished the critical part of disabling will take
+ * themselves off @sch->children. Wait for it to drain. As propagation
+ * is recursive, empty @sch->children means that all proper descendant
+ * scheds reached unlinking stage.
+ */
+ wait_event(scx_unlink_waitq, list_empty(&sch->children));
+ }
+
+ static void scx_fail_parent(struct scx_sched *sch,
+ struct task_struct *failed, s32 fail_code)
+ {
+ struct scx_sched *parent = scx_parent(sch);
+ struct scx_task_iter sti;
+ struct task_struct *p;
+
+ scx_error(parent, "ops.init_task() failed (%d) for %s[%d] while disabling a sub-scheduler",
+ fail_code, failed->comm, failed->pid);
+
+ /*
+ * Once $parent is bypassed, it's safe to put SCX_TASK_NONE tasks into
+ * it. This may cause downstream failures on the BPF side but $parent is
+ * dying anyway.
+ */
+ scx_bypass(parent, true);
+
+ scx_task_iter_start(&sti, sch->cgrp);
+ while ((p = scx_task_iter_next_locked(&sti))) {
+ if (scx_task_on_sched(parent, p))
+ continue;
+
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+ scx_disable_and_exit_task(sch, p);
+ rcu_assign_pointer(p->scx.sched, parent);
+ }
+ }
+ scx_task_iter_stop(&sti);
+ }
+
+ static void scx_sub_disable(struct scx_sched *sch)
+ {
+ struct scx_sched *parent = scx_parent(sch);
+ struct scx_task_iter sti;
+ struct task_struct *p;
+ int ret;
+
+ /*
+ * Guarantee forward progress and wait for descendants to be disabled.
+ * To limit disruptions, $parent is not bypassed. Tasks are fully
+ * prepped and then inserted back into $parent.
+ */
+ scx_bypass(sch, true);
+ drain_descendants(sch);
+
+ /*
+ * Here, every runnable task is guaranteed to make forward progress and
+ * we can safely use blocking synchronization constructs. Actually
+ * disable ops.
+ */
+ mutex_lock(&scx_enable_mutex);
+ percpu_down_write(&scx_fork_rwsem);
+ scx_cgroup_lock();
+
+ set_cgroup_sched(sch_cgroup(sch), parent);
+
+ scx_task_iter_start(&sti, sch->cgrp);
+ while ((p = scx_task_iter_next_locked(&sti))) {
+ struct rq *rq;
+ struct rq_flags rf;
+
+ /* filter out duplicate visits */
+ if (scx_task_on_sched(parent, p))
+ continue;
+
+ /*
+ * By the time control reaches here, all descendant schedulers
+ * should already have been disabled.
+ */
+ WARN_ON_ONCE(!scx_task_on_sched(sch, p));
+
+ /*
+ * If $p is about to be freed, nothing prevents $sch from
+ * unloading before $p reaches sched_ext_free(). Disable and
+ * exit $p right away.
+ */
+ if (!tryget_task_struct(p)) {
+ scx_disable_and_exit_task(sch, p);
+ continue;
+ }
+
+ scx_task_iter_unlock(&sti);
+
+ /*
+ * $p is READY or ENABLED on @sch. Initialize for $parent,
+ * disable and exit from @sch, and then switch over to $parent.
+ *
+ * If a task fails to initialize for $parent, the only available
+ * action is disabling $parent too. While this allows disabling
+ * of a child sched to cause the parent scheduler to fail, the
+ * failure can only originate from ops.init_task() of the
+ * parent. A child can't directly affect the parent through its
+ * own failures.
+ */
+ ret = __scx_init_task(parent, p, false);
+ if (ret) {
+ scx_fail_parent(sch, p, ret);
+ put_task_struct(p);
+ break;
+ }
+
+ rq = task_rq_lock(p, &rf);
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+ /*
+ * $p is initialized for $parent and still attached to
+ * @sch. Disable and exit for @sch, switch over to
+ * $parent, override the state to READY to account for
+ * $p having already been initialized, and then enable.
+ */
+ scx_disable_and_exit_task(sch, p);
+ scx_set_task_state(p, SCX_TASK_INIT);
+ rcu_assign_pointer(p->scx.sched, parent);
+ scx_set_task_state(p, SCX_TASK_READY);
+ scx_enable_task(parent, p);
+ }
+ task_rq_unlock(rq, p, &rf);
+
+ put_task_struct(p);
+ }
+ scx_task_iter_stop(&sti);
+
+ scx_cgroup_unlock();
+ percpu_up_write(&scx_fork_rwsem);
+
+ /*
+ * All tasks are moved off of @sch but there may still be on-going
+ * operations (e.g. ops.select_cpu()). Drain them by flushing RCU. Use
+ * the expedited version as ancestors may be waiting in bypass mode.
+ * Also, tell the parent that there is no need to keep running bypass
+ * DSQs for us.
+ */
+ synchronize_rcu_expedited();
+ disable_bypass_dsp(sch);
+
+ scx_unlink_sched(sch);
+
+ mutex_unlock(&scx_enable_mutex);
+
+ /*
+ * @sch is now unlinked from the parent's children list. Notify and call
+ * ops.sub_detach/exit(). Note that ops.sub_detach/exit() must be called
+ * after unlinking and releasing all locks. See scx_claim_exit().
+ */
+ wake_up_all(&scx_unlink_waitq);
+
+ if (sch->ops.sub_detach && sch->sub_attached) {
+ struct scx_sub_detach_args sub_detach_args = {
+ .ops = &sch->ops,
+ .cgroup_path = sch->cgrp_path,
+ };
+ SCX_CALL_OP(parent, SCX_KF_UNLOCKED, sub_detach, NULL,
+ &sub_detach_args);
+ }
+
+ if (sch->ops.exit)
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, exit, NULL, sch->exit_info);
+ kobject_del(&sch->kobj);
+ }
+ #else /* CONFIG_EXT_SUB_SCHED */
+ static void drain_descendants(struct scx_sched *sch) { }
+ static void scx_sub_disable(struct scx_sched *sch) { }
+ #endif /* CONFIG_EXT_SUB_SCHED */
+
+ static void scx_root_disable(struct scx_sched *sch)
{
- struct scx_sched *sch = container_of(work, struct scx_sched, disable_work);
struct scx_exit_info *ei = sch->exit_info;
struct scx_task_iter sti;
struct task_struct *p;
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: linux-next: manual merge of the sched-ext tree with the tip tree
2026-03-09 18:49 linux-next: manual merge of the sched-ext tree with the tip tree Mark Brown
@ 2026-03-09 20:02 ` Tejun Heo
2026-03-09 20:11 ` [PATCH sched_ext/for-7.1] sched_ext: Replace system_unbound_wq with system_dfl_wq in scx_kobj_release() Tejun Heo
1 sibling, 0 replies; 13+ messages in thread
From: Tejun Heo @ 2026-03-09 20:02 UTC (permalink / raw)
To: Mark Brown
Cc: Ingo Molnar, Linux Kernel Mailing List, Linux Next Mailing List,
Marco Crivellari, Peter Zijlstra
On Mon, Mar 09, 2026 at 06:49:44PM +0000, Mark Brown wrote:
> Hi all,
>
> Today's linux-next merge of the sched-ext tree got a conflict in:
>
> kernel/sched/ext.c
>
> between commits:
>
> c2a57380df9dd ("sched: Replace use of system_unbound_wq with system_dfl_wq")
>
> from the tip tree and commit:
>
> cde94c032b32b ("sched_ext: Make watchdog sub-sched aware")
>
> from the sched-ext tree.
>
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging. You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.
I pulled in sched/core into sched_ext/for-7.1 and resolved the conflict.
> I do note there's another system_unbound_wq usage there which for some
> reason wasn't updated...
Yeah, I'll queue a patch to convert that one too.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 13+ messages in thread* [PATCH sched_ext/for-7.1] sched_ext: Replace system_unbound_wq with system_dfl_wq in scx_kobj_release()
2026-03-09 18:49 linux-next: manual merge of the sched-ext tree with the tip tree Mark Brown
2026-03-09 20:02 ` Tejun Heo
@ 2026-03-09 20:11 ` Tejun Heo
1 sibling, 0 replies; 13+ messages in thread
From: Tejun Heo @ 2026-03-09 20:11 UTC (permalink / raw)
To: Mark Brown, Ingo Molnar, Linux Kernel Mailing List,
Linux Next Mailing List, Marco Crivellari, Peter Zijlstra
Cc: David Vernet, Andrea Righi, Changwoo Min, Emil Tsalapatis,
sched-ext
c2a57380df9d ("sched: Replace use of system_unbound_wq with system_dfl_wq")
converted system_unbound_wq usages in ext.c but missed the queue_rcu_work()
call in scx_kobj_release() which was added later by the dynamic scx_sched
allocation conversion. Apply the same conversion.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Marco Crivellari <marco.crivellari@suse.com>
---
Applied to sched_ext/for-7.1.
kernel/sched/ext.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index b35b98020f3b..07476355bfd5 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4546,7 +4546,7 @@ static void scx_kobj_release(struct kobject *kobj)
struct scx_sched *sch = container_of(kobj, struct scx_sched, kobj);
INIT_RCU_WORK(&sch->rcu_work, scx_sched_free_rcu_work);
- queue_rcu_work(system_unbound_wq, &sch->rcu_work);
+ queue_rcu_work(system_dfl_wq, &sch->rcu_work);
}
static ssize_t scx_attr_ops_show(struct kobject *kobj,
--
2.53.0
--
tejun
^ permalink raw reply related [flat|nested] 13+ messages in thread
* linux-next: manual merge of the sched-ext tree with the tip tree
@ 2026-03-03 13:28 Mark Brown
0 siblings, 0 replies; 13+ messages in thread
From: Mark Brown @ 2026-03-03 13:28 UTC (permalink / raw)
To: Tejun Heo
Cc: Linux Kernel Mailing List, Linux Next Mailing List,
Marco Crivellari, Peter Zijlstra, zhidao su
[-- Attachment #1: Type: text/plain, Size: 1681 bytes --]
Hi all,
Today's linux-next merge of the sched-ext tree got a conflict in:
kernel/sched/ext.c
between commit:
c2a57380df9dd ("sched: Replace use of system_unbound_wq with system_dfl_wq")
from the tip tree and commit:
3f27958b729a2 ("sched_ext: Use READ_ONCE() for plain reads of scx_watchdog_timeout")
from the sched-ext tree.
I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging. You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.
diff --cc kernel/sched/ext.c
index a448a8407d8e0,b9247c9f0430b..0000000000000
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@@ -2762,8 -2848,8 +2848,8 @@@ static void scx_watchdog_workfn(struct
cond_resched();
}
- queue_delayed_work(system_unbound_wq, to_delayed_work(work),
+ queue_delayed_work(system_dfl_wq, to_delayed_work(work),
- scx_watchdog_timeout / 2);
+ READ_ONCE(scx_watchdog_timeout) / 2);
}
void scx_tick(struct rq *rq)
@@@ -5059,8 -5175,8 +5175,8 @@@ static int scx_enable(struct sched_ext_
WRITE_ONCE(scx_watchdog_timeout, timeout);
WRITE_ONCE(scx_watchdog_timestamp, jiffies);
- queue_delayed_work(system_unbound_wq, &scx_watchdog_work,
+ queue_delayed_work(system_dfl_wq, &scx_watchdog_work,
- scx_watchdog_timeout / 2);
+ READ_ONCE(scx_watchdog_timeout) / 2);
/*
* Once __scx_enabled is set, %current can be switched to SCX anytime.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 484 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread* linux-next: manual merge of the sched-ext tree with the tip tree
@ 2024-10-15 2:49 Stephen Rothwell
2024-10-17 2:22 ` Stephen Rothwell
0 siblings, 1 reply; 13+ messages in thread
From: Stephen Rothwell @ 2024-10-15 2:49 UTC (permalink / raw)
To: Tejun Heo, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Peter Zijlstra
Cc: Linux Kernel Mailing List, Linux Next Mailing List
[-- Attachment #1: Type: text/plain, Size: 1647 bytes --]
Hi all,
Today's linux-next merge of the sched-ext tree got a conflict in:
kernel/sched/ext.c
between commit:
98442f0ccd82 ("sched: Fix delayed_dequeue vs switched_from_fair()")
from the tip tree and commit:
3fdb9ebcec10 ("sched_ext: Start schedulers with consistent p->scx.slice values")
from the sched-ext tree.
I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging. You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.
--
Cheers,
Stephen Rothwell
diff --cc kernel/sched/ext.c
index 5c8453f6a5b5,281652d5df8b..000000000000
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@@ -4475,8 -4560,7 +4560,7 @@@ static void scx_ops_disable_workfn(stru
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
- p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL);
- __setscheduler_prio(p, p->prio);
+ p->sched_class = __setscheduler_class(p, p->prio);
check_class_changing(task_rq(p), p, old_class);
sched_enq_and_set_task(&ctx);
@@@ -5192,7 -5270,8 +5270,8 @@@ static int scx_ops_enable(struct sched_
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
+ p->scx.slice = SCX_SLICE_DFL;
- __setscheduler_prio(p, p->prio);
+ p->sched_class = __setscheduler_class(p, p->prio);
check_class_changing(task_rq(p), p, old_class);
sched_enq_and_set_task(&ctx);
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: linux-next: manual merge of the sched-ext tree with the tip tree
2024-10-15 2:49 Stephen Rothwell
@ 2024-10-17 2:22 ` Stephen Rothwell
0 siblings, 0 replies; 13+ messages in thread
From: Stephen Rothwell @ 2024-10-17 2:22 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Peter Zijlstra,
Kent Overstreet
Cc: Tejun Heo, Linux Kernel Mailing List, Linux Next Mailing List
[-- Attachment #1: Type: text/plain, Size: 1941 bytes --]
Hi all,
On Tue, 15 Oct 2024 13:49:04 +1100 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
>
> Today's linux-next merge of the sched-ext tree got a conflict in:
>
> kernel/sched/ext.c
>
> between commit:
>
> 98442f0ccd82 ("sched: Fix delayed_dequeue vs switched_from_fair()")
>
> from the tip tree and commit:
>
> 3fdb9ebcec10 ("sched_ext: Start schedulers with consistent p->scx.slice values")
>
> from the sched-ext tree.
>
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging. You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.
>
>
> diff --cc kernel/sched/ext.c
> index 5c8453f6a5b5,281652d5df8b..000000000000
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@@ -4475,8 -4560,7 +4560,7 @@@ static void scx_ops_disable_workfn(stru
>
> sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
>
> - p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL);
> - __setscheduler_prio(p, p->prio);
> + p->sched_class = __setscheduler_class(p, p->prio);
> check_class_changing(task_rq(p), p, old_class);
>
> sched_enq_and_set_task(&ctx);
> @@@ -5192,7 -5270,8 +5270,8 @@@ static int scx_ops_enable(struct sched_
>
> sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
>
> + p->scx.slice = SCX_SLICE_DFL;
> - __setscheduler_prio(p, p->prio);
> + p->sched_class = __setscheduler_class(p, p->prio);
> check_class_changing(task_rq(p), p, old_class);
>
> sched_enq_and_set_task(&ctx);
This is now a conflict between the tip tree and Linus' tree. Also
between the bcachefs tree and Linus' tree.
--
Cheers,
Stephen Rothwell
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* linux-next: manual merge of the sched-ext tree with the tip tree
@ 2024-09-11 6:40 Stephen Rothwell
2024-09-11 19:03 ` Tejun Heo
0 siblings, 1 reply; 13+ messages in thread
From: Stephen Rothwell @ 2024-09-11 6:40 UTC (permalink / raw)
To: Tejun Heo, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Peter Zijlstra
Cc: Vincent Guittot, Chen Yu, Linux Kernel Mailing List,
Linux Next Mailing List
[-- Attachment #1: Type: text/plain, Size: 5350 bytes --]
Hi all,
Today's linux-next merge of the sched-ext tree got conflicts in:
kernel/sched/fair.c
kernel/sched/syscalls.c
between commits:
84d265281d6c ("sched/pelt: Use rq_clock_task() for hw_pressure")
5d871a63997f ("sched/fair: Move effective_cpu_util() and effective_cpu_util() in fair.c")
from the tip tree and commit:
96fd6c65efc6 ("sched: Factor out update_other_load_avgs() from __update_blocked_others()")
from the sched-ext tree.
I fixed it up (I used the latter version of kernel/sched/fair.c and see
below) and can carry the fix as necessary. This is now fixed as far as
linux-next is concerned, but any non trivial conflicts should be
mentioned to your upstream maintainer when your tree is submitted for
merging. You may also want to consider cooperating with the maintainer
of the conflicting tree to minimise any particularly complex conflicts.
--
Cheers,
Stephen Rothwell
diff --cc kernel/sched/syscalls.c
index cb03c790c27a,7ecade89eada..000000000000
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@@ -258,6 -258,126 +258,28 @@@ int sched_core_idle_cpu(int cpu
#endif
+ #ifdef CONFIG_SMP
+ /*
+ * Load avg and utiliztion metrics need to be updated periodically and before
+ * consumption. This function updates the metrics for all subsystems except for
+ * the fair class. @rq must be locked and have its clock updated.
+ */
+ bool update_other_load_avgs(struct rq *rq)
+ {
+ u64 now = rq_clock_pelt(rq);
+ const struct sched_class *curr_class = rq->curr->sched_class;
+ unsigned long hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
+
+ lockdep_assert_rq_held(rq);
+
++ /* hw_pressure doesn't care about invariance */
+ return update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
+ update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
- update_hw_load_avg(now, rq, hw_pressure) |
++ update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure) |
+ update_irq_load_avg(rq, 0);
+ }
-
-/*
- * This function computes an effective utilization for the given CPU, to be
- * used for frequency selection given the linear relation: f = u * f_max.
- *
- * The scheduler tracks the following metrics:
- *
- * cpu_util_{cfs,rt,dl,irq}()
- * cpu_bw_dl()
- *
- * Where the cfs,rt and dl util numbers are tracked with the same metric and
- * synchronized windows and are thus directly comparable.
- *
- * The cfs,rt,dl utilization are the running times measured with rq->clock_task
- * which excludes things like IRQ and steal-time. These latter are then accrued
- * in the IRQ utilization.
- *
- * The DL bandwidth number OTOH is not a measured metric but a value computed
- * based on the task model parameters and gives the minimal utilization
- * required to meet deadlines.
- */
-unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
- unsigned long *min,
- unsigned long *max)
-{
- unsigned long util, irq, scale;
- struct rq *rq = cpu_rq(cpu);
-
- scale = arch_scale_cpu_capacity(cpu);
-
- /*
- * Early check to see if IRQ/steal time saturates the CPU, can be
- * because of inaccuracies in how we track these -- see
- * update_irq_load_avg().
- */
- irq = cpu_util_irq(rq);
- if (unlikely(irq >= scale)) {
- if (min)
- *min = scale;
- if (max)
- *max = scale;
- return scale;
- }
-
- if (min) {
- /*
- * The minimum utilization returns the highest level between:
- * - the computed DL bandwidth needed with the IRQ pressure which
- * steals time to the deadline task.
- * - The minimum performance requirement for CFS and/or RT.
- */
- *min = max(irq + cpu_bw_dl(rq), uclamp_rq_get(rq, UCLAMP_MIN));
-
- /*
- * When an RT task is runnable and uclamp is not used, we must
- * ensure that the task will run at maximum compute capacity.
- */
- if (!uclamp_is_used() && rt_rq_is_runnable(&rq->rt))
- *min = max(*min, scale);
- }
-
- /*
- * Because the time spend on RT/DL tasks is visible as 'lost' time to
- * CFS tasks and we use the same metric to track the effective
- * utilization (PELT windows are synchronized) we can directly add them
- * to obtain the CPU's actual utilization.
- */
- util = util_cfs + cpu_util_rt(rq);
- util += cpu_util_dl(rq);
-
- /*
- * The maximum hint is a soft bandwidth requirement, which can be lower
- * than the actual utilization because of uclamp_max requirements.
- */
- if (max)
- *max = min(scale, uclamp_rq_get(rq, UCLAMP_MAX));
-
- if (util >= scale)
- return scale;
-
- /*
- * There is still idle time; further improve the number by using the
- * IRQ metric. Because IRQ/steal time is hidden from the task clock we
- * need to scale the task numbers:
- *
- * max - irq
- * U' = irq + --------- * U
- * max
- */
- util = scale_irq_capacity(util, irq, scale);
- util += irq;
-
- return min(scale, util);
-}
-
-unsigned long sched_cpu_util(int cpu)
-{
- return effective_cpu_util(cpu, cpu_util_cfs(cpu), NULL, NULL);
-}
+ #endif /* CONFIG_SMP */
+
/**
* find_process_by_pid - find a process with a matching PID value.
* @pid: the pid in question.
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: linux-next: manual merge of the sched-ext tree with the tip tree
2024-09-11 6:40 Stephen Rothwell
@ 2024-09-11 19:03 ` Tejun Heo
0 siblings, 0 replies; 13+ messages in thread
From: Tejun Heo @ 2024-09-11 19:03 UTC (permalink / raw)
To: Stephen Rothwell
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Peter Zijlstra,
Vincent Guittot, Chen Yu, Linux Kernel Mailing List,
Linux Next Mailing List
On Wed, Sep 11, 2024 at 04:40:46PM +1000, Stephen Rothwell wrote:
> Hi all,
>
> Today's linux-next merge of the sched-ext tree got conflicts in:
>
> kernel/sched/fair.c
> kernel/sched/syscalls.c
>
> between commits:
>
> 84d265281d6c ("sched/pelt: Use rq_clock_task() for hw_pressure")
> 5d871a63997f ("sched/fair: Move effective_cpu_util() and effective_cpu_util() in fair.c")
>
> from the tip tree and commit:
>
> 96fd6c65efc6 ("sched: Factor out update_other_load_avgs() from __update_blocked_others()")
>
> from the sched-ext tree.
>
> I fixed it up (I used the latter version of kernel/sched/fair.c and see
> below) and can carry the fix as necessary. This is now fixed as far as
> linux-next is concerned, but any non trivial conflicts should be
> mentioned to your upstream maintainer when your tree is submitted for
> merging. You may also want to consider cooperating with the maintainer
> of the conflicting tree to minimise any particularly complex conflicts.
I pulled tip/sched/core and resolved the conflict the same way. Will follow
up with a minor cleanup patch.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 13+ messages in thread
* linux-next: manual merge of the sched-ext tree with the tip tree
@ 2024-08-22 3:43 Stephen Rothwell
0 siblings, 0 replies; 13+ messages in thread
From: Stephen Rothwell @ 2024-08-22 3:43 UTC (permalink / raw)
To: Tejun Heo, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Peter Zijlstra
Cc: Linux Kernel Mailing List, Linux Next Mailing List
[-- Attachment #1: Type: text/plain, Size: 1991 bytes --]
Hi all,
Today's linux-next merge of the sched-ext tree got a semantic conflict in:
kernel/sched/ext.c
between commit:
863ccdbb918a ("sched: Allow sched_class::dequeue_task() to fail")
from the tip tree and commit:
f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class")
from the sched-ext tree.
I fixed it up (I applied the following merge fix) and can carry the fix
as necessary. This is now fixed as far as linux-next is concerned, but
any non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging. You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 22 Aug 2024 13:37:43 +1000
Subject: [PATCH] fix up for "sched_ext: Implement BPF extensible scheduler
class"
interacting with "sched: Allow sched_class::dequeue_task() to fail"
from the tip tree.
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
kernel/sched/ext.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 571a7ea0b5cb..1a9a3cc68a98 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2033,11 +2033,11 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags)
}
}
-static void dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags)
+static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags)
{
if (!(p->scx.flags & SCX_TASK_QUEUED)) {
WARN_ON_ONCE(task_runnable(p));
- return;
+ return true;
}
ops_dequeue(p, deq_flags);
@@ -2072,6 +2072,8 @@ static void dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
sub_nr_running(rq, 1);
dispatch_dequeue(rq, p);
+
+ return true;
}
static void yield_task_scx(struct rq *rq)
--
2.43.0
--
Cheers,
Stephen Rothwell
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply related [flat|nested] 13+ messages in thread* linux-next: manual merge of the sched-ext tree with the tip tree
@ 2024-08-01 2:45 Stephen Rothwell
0 siblings, 0 replies; 13+ messages in thread
From: Stephen Rothwell @ 2024-08-01 2:45 UTC (permalink / raw)
To: Tejun Heo, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Peter Zijlstra
Cc: Joel Fernandes (Google), Daniel Bristot de Oliveira,
Linux Kernel Mailing List, Linux Next Mailing List,
Yang Yingliang
[-- Attachment #1: Type: text/plain, Size: 2282 bytes --]
Hi all,
Today's linux-next merge of the sched-ext tree got a conflict in:
kernel/sched/core.c
between commits:
c245910049d0 ("sched/core: Add clearing of ->dl_server in put_prev_task_balance()")
31b164e2e4af ("sched/smt: Introduce sched_smt_present_inc/dec() helper")
from the tip tree and commits:
60c27fb59f6c ("sched_ext: Implement sched_ext_ops.cpu_online/offline()")
a7a9fc549293 ("sched_ext: Add boilerplate for extensible scheduler class")
from the sched-ext tree.
I fixed it up (I think, see below) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging. You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.
--
Cheers,
Stephen Rothwell
diff --cc kernel/sched/core.c
index 73fa0290fd8f,22f86d5e9231..000000000000
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -5878,10 -5901,23 +5947,15 @@@ __pick_next_task(struct rq *rq, struct
restart:
put_prev_task_balance(rq, prev, rf);
- for_each_class(class) {
- /*
- * We've updated @prev and no longer need the server link, clear it.
- * Must be done before ->pick_next_task() because that can (re)set
- * ->dl_server.
- */
- if (prev->dl_server)
- prev->dl_server = NULL;
-
+ for_each_active_class(class) {
p = class->pick_next_task(rq);
- if (p)
+ if (p) {
+ const struct sched_class *prev_class = prev->sched_class;
+
+ if (class != prev_class && prev_class->switch_class)
+ prev_class->switch_class(rq, p);
return p;
+ }
}
BUG(); /* The idle class should always have a runnable task. */
@@@ -8040,8 -8056,16 +8122,10 @@@ int sched_cpu_deactivate(unsigned int c
*/
synchronize_rcu();
- rq_lock_irqsave(rq, &rf);
- if (rq->rd) {
- BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
- set_rq_offline(rq);
- }
- rq_unlock_irqrestore(rq, &rf);
+ sched_set_rq_offline(rq, cpu);
+ scx_rq_deactivate(rq);
+
-#ifdef CONFIG_SCHED_SMT
/*
* When going down, decrement the number of cores with SMT present.
*/
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread* linux-next: manual merge of the sched-ext tree with the tip tree
@ 2024-08-01 2:45 Stephen Rothwell
2024-08-04 17:42 ` Tejun Heo
0 siblings, 1 reply; 13+ messages in thread
From: Stephen Rothwell @ 2024-08-01 2:45 UTC (permalink / raw)
To: Tejun Heo, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Peter Zijlstra
Cc: Linux Kernel Mailing List, Linux Next Mailing List, Tianchen Ding
[-- Attachment #1: Type: text/plain, Size: 749 bytes --]
Hi all,
Today's linux-next merge of the sched-ext tree got a conflict in:
kernel/sched/fair.c
between commit:
faa42d29419d ("sched/fair: Make SCHED_IDLE entity be preempted in strict hierarchy")
from the tip tree and commit:
2c8d046d5d51 ("sched: Add normal_policy()")
from the sched-ext tree.
I fixed it up (I used the former version) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging. You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.
--
Cheers,
Stephen Rothwell
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: linux-next: manual merge of the sched-ext tree with the tip tree
2024-08-01 2:45 Stephen Rothwell
@ 2024-08-04 17:42 ` Tejun Heo
0 siblings, 0 replies; 13+ messages in thread
From: Tejun Heo @ 2024-08-04 17:42 UTC (permalink / raw)
To: Stephen Rothwell
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Peter Zijlstra,
Linux Kernel Mailing List, Linux Next Mailing List, Tianchen Ding
Hello, Stephen.
On Thu, Aug 01, 2024 at 12:45:41PM +1000, Stephen Rothwell wrote:
> Hi all,
>
> Today's linux-next merge of the sched-ext tree got a conflict in:
>
> kernel/sched/fair.c
>
> between commit:
>
> faa42d29419d ("sched/fair: Make SCHED_IDLE entity be preempted in strict hierarchy")
>
> from the tip tree and commit:
>
> 2c8d046d5d51 ("sched: Add normal_policy()")
>
> from the sched-ext tree.
>
> I fixed it up (I used the former version) and can carry the fix as
> necessary. This is now fixed as far as linux-next is concerned, but any
> non trivial conflicts should be mentioned to your upstream maintainer
> when your tree is submitted for merging. You may also want to consider
> cooperating with the maintainer of the conflicting tree to minimise any
> particularly complex conflicts.
This merge is a bit tricky because the former moves the test that the latter
converts and the new location doesn't show up as conflict. I merged
tip/sched/core into sched_ext/for-6.12 and resolved all the conflicts.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 13+ messages in thread
* linux-next: manual merge of the sched-ext tree with the tip tree
@ 2024-08-01 2:45 Stephen Rothwell
0 siblings, 0 replies; 13+ messages in thread
From: Stephen Rothwell @ 2024-08-01 2:45 UTC (permalink / raw)
To: Tejun Heo, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
Peter Zijlstra
Cc: Daniel Bristot de Oliveira, Linux Kernel Mailing List,
Linux Next Mailing List
[-- Attachment #1: Type: text/plain, Size: 1377 bytes --]
Hi all,
Today's linux-next merge of the sched-ext tree got a conflict in:
kernel/sched/idle.c
between commit:
a110a81c52a9 ("sched/deadline: Deferrable dl server")
from the tip tree and commit:
a7a9fc549293 ("sched_ext: Add boilerplate for extensible scheduler class")
from the sched-ext tree.
I fixed it up (I think, see below) and can carry the fix as necessary.
This is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging. You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.
--
Cheers,
Stephen Rothwell
diff --cc kernel/sched/idle.c
index d560f7ffa463,c7a218123b7a..000000000000
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@@ -452,14 -452,14 +452,16 @@@ static void wakeup_preempt_idle(struct
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
+ dl_server_update_idle_time(rq, prev);
+ scx_update_idle(rq, false);
}
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
{
update_idle_core(rq);
+ scx_update_idle(rq, true);
schedstat_inc(rq->sched_goidle);
+ next->se.exec_start = rq_clock_task(rq);
}
#ifdef CONFIG_SMP
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2026-03-09 20:11 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-09 18:49 linux-next: manual merge of the sched-ext tree with the tip tree Mark Brown
2026-03-09 20:02 ` Tejun Heo
2026-03-09 20:11 ` [PATCH sched_ext/for-7.1] sched_ext: Replace system_unbound_wq with system_dfl_wq in scx_kobj_release() Tejun Heo
-- strict thread matches above, loose matches on Subject: below --
2026-03-03 13:28 linux-next: manual merge of the sched-ext tree with the tip tree Mark Brown
2024-10-15 2:49 Stephen Rothwell
2024-10-17 2:22 ` Stephen Rothwell
2024-09-11 6:40 Stephen Rothwell
2024-09-11 19:03 ` Tejun Heo
2024-08-22 3:43 Stephen Rothwell
2024-08-01 2:45 Stephen Rothwell
2024-08-01 2:45 Stephen Rothwell
2024-08-04 17:42 ` Tejun Heo
2024-08-01 2:45 Stephen Rothwell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox