From: Mark Brown <broonie@kernel.org>
To: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>,
Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
Linux Next Mailing List <linux-next@vger.kernel.org>,
Marco Crivellari <marco.crivellari@suse.com>,
Peter Zijlstra <peterz@infradead.org>
Subject: linux-next: manual merge of the sched-ext tree with the tip tree
Date: Mon, 9 Mar 2026 18:49:44 +0000 [thread overview]
Message-ID: <aa8WSH-ILSebstIB@sirena.org.uk> (raw)
[-- Attachment #1: Type: text/plain, Size: 9292 bytes --]
Hi all,
Today's linux-next merge of the sched-ext tree got a conflict in:
kernel/sched/ext.c
between commits:
c2a57380df9dd ("sched: Replace use of system_unbound_wq with system_dfl_wq")
from the tip tree and commit:
cde94c032b32b ("sched_ext: Make watchdog sub-sched aware")
from the sched-ext tree.
I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging. You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.
I do note there's another system_unbound_wq usage there which for some
reason wasn't updated...
diff --cc kernel/sched/ext.c
index 7278d57496478,d6d8073370130..0000000000000
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@@ -2766,8 -3205,11 +3205,11 @@@ static void scx_watchdog_workfn(struct
cond_resched();
}
- queue_delayed_work(system_dfl_wq, to_delayed_work(work),
- READ_ONCE(scx_watchdog_timeout) / 2);
+
+ intv = READ_ONCE(scx_watchdog_interval);
+ if (intv < ULONG_MAX)
- queue_delayed_work(system_unbound_wq, to_delayed_work(work),
++ queue_delayed_work(system_dfl_wq, to_delayed_work(work),
+ intv);
}
void scx_tick(struct rq *rq)
@@@ -4282,9 -5218,247 +5218,247 @@@ static void free_kick_syncs(void
}
}
- static void scx_disable_workfn(struct kthread_work *work)
+ static void refresh_watchdog(void)
+ {
+ struct scx_sched *sch;
+ unsigned long intv = ULONG_MAX;
+
+ /* take the shortest timeout and use its half for watchdog interval */
+ rcu_read_lock();
+ list_for_each_entry_rcu(sch, &scx_sched_all, all)
+ intv = max(min(intv, sch->watchdog_timeout / 2), 1);
+ rcu_read_unlock();
+
+ WRITE_ONCE(scx_watchdog_timestamp, jiffies);
+ WRITE_ONCE(scx_watchdog_interval, intv);
+
+ if (intv < ULONG_MAX)
- mod_delayed_work(system_unbound_wq, &scx_watchdog_work, intv);
++ mod_delayed_work(system_dfl_wq, &scx_watchdog_work, intv);
+ else
+ cancel_delayed_work_sync(&scx_watchdog_work);
+ }
+
+ static s32 scx_link_sched(struct scx_sched *sch)
+ {
+ scoped_guard(raw_spinlock_irq, &scx_sched_lock) {
+ #ifdef CONFIG_EXT_SUB_SCHED
+ struct scx_sched *parent = scx_parent(sch);
+ s32 ret;
+
+ if (parent) {
+ ret = rhashtable_lookup_insert_fast(&scx_sched_hash,
+ &sch->hash_node, scx_sched_hash_params);
+ if (ret) {
+ scx_error(sch, "failed to insert into scx_sched_hash (%d)", ret);
+ return ret;
+ }
+
+ list_add_tail(&sch->sibling, &parent->children);
+ }
+ #endif /* CONFIG_EXT_SUB_SCHED */
+
+ list_add_tail_rcu(&sch->all, &scx_sched_all);
+ }
+
+ refresh_watchdog();
+ return 0;
+ }
+
+ static void scx_unlink_sched(struct scx_sched *sch)
+ {
+ scoped_guard(raw_spinlock_irq, &scx_sched_lock) {
+ #ifdef CONFIG_EXT_SUB_SCHED
+ if (scx_parent(sch)) {
+ rhashtable_remove_fast(&scx_sched_hash, &sch->hash_node,
+ scx_sched_hash_params);
+ list_del_init(&sch->sibling);
+ }
+ #endif /* CONFIG_EXT_SUB_SCHED */
+ list_del_rcu(&sch->all);
+ }
+
+ refresh_watchdog();
+ }
+
+ #ifdef CONFIG_EXT_SUB_SCHED
+ static DECLARE_WAIT_QUEUE_HEAD(scx_unlink_waitq);
+
+ static void drain_descendants(struct scx_sched *sch)
+ {
+ /*
+ * Child scheds that finished the critical part of disabling will take
+ * themselves off @sch->children. Wait for it to drain. As propagation
+ * is recursive, empty @sch->children means that all proper descendant
+ * scheds reached unlinking stage.
+ */
+ wait_event(scx_unlink_waitq, list_empty(&sch->children));
+ }
+
+ static void scx_fail_parent(struct scx_sched *sch,
+ struct task_struct *failed, s32 fail_code)
+ {
+ struct scx_sched *parent = scx_parent(sch);
+ struct scx_task_iter sti;
+ struct task_struct *p;
+
+ scx_error(parent, "ops.init_task() failed (%d) for %s[%d] while disabling a sub-scheduler",
+ fail_code, failed->comm, failed->pid);
+
+ /*
+ * Once $parent is bypassed, it's safe to put SCX_TASK_NONE tasks into
+ * it. This may cause downstream failures on the BPF side but $parent is
+ * dying anyway.
+ */
+ scx_bypass(parent, true);
+
+ scx_task_iter_start(&sti, sch->cgrp);
+ while ((p = scx_task_iter_next_locked(&sti))) {
+ if (scx_task_on_sched(parent, p))
+ continue;
+
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+ scx_disable_and_exit_task(sch, p);
+ rcu_assign_pointer(p->scx.sched, parent);
+ }
+ }
+ scx_task_iter_stop(&sti);
+ }
+
+ static void scx_sub_disable(struct scx_sched *sch)
+ {
+ struct scx_sched *parent = scx_parent(sch);
+ struct scx_task_iter sti;
+ struct task_struct *p;
+ int ret;
+
+ /*
+ * Guarantee forward progress and wait for descendants to be disabled.
+ * To limit disruptions, $parent is not bypassed. Tasks are fully
+ * prepped and then inserted back into $parent.
+ */
+ scx_bypass(sch, true);
+ drain_descendants(sch);
+
+ /*
+ * Here, every runnable task is guaranteed to make forward progress and
+ * we can safely use blocking synchronization constructs. Actually
+ * disable ops.
+ */
+ mutex_lock(&scx_enable_mutex);
+ percpu_down_write(&scx_fork_rwsem);
+ scx_cgroup_lock();
+
+ set_cgroup_sched(sch_cgroup(sch), parent);
+
+ scx_task_iter_start(&sti, sch->cgrp);
+ while ((p = scx_task_iter_next_locked(&sti))) {
+ struct rq *rq;
+ struct rq_flags rf;
+
+ /* filter out duplicate visits */
+ if (scx_task_on_sched(parent, p))
+ continue;
+
+ /*
+ * By the time control reaches here, all descendant schedulers
+ * should already have been disabled.
+ */
+ WARN_ON_ONCE(!scx_task_on_sched(sch, p));
+
+ /*
+ * If $p is about to be freed, nothing prevents $sch from
+ * unloading before $p reaches sched_ext_free(). Disable and
+ * exit $p right away.
+ */
+ if (!tryget_task_struct(p)) {
+ scx_disable_and_exit_task(sch, p);
+ continue;
+ }
+
+ scx_task_iter_unlock(&sti);
+
+ /*
+ * $p is READY or ENABLED on @sch. Initialize for $parent,
+ * disable and exit from @sch, and then switch over to $parent.
+ *
+ * If a task fails to initialize for $parent, the only available
+ * action is disabling $parent too. While this allows disabling
+ * of a child sched to cause the parent scheduler to fail, the
+ * failure can only originate from ops.init_task() of the
+ * parent. A child can't directly affect the parent through its
+ * own failures.
+ */
+ ret = __scx_init_task(parent, p, false);
+ if (ret) {
+ scx_fail_parent(sch, p, ret);
+ put_task_struct(p);
+ break;
+ }
+
+ rq = task_rq_lock(p, &rf);
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+ /*
+ * $p is initialized for $parent and still attached to
+ * @sch. Disable and exit for @sch, switch over to
+ * $parent, override the state to READY to account for
+ * $p having already been initialized, and then enable.
+ */
+ scx_disable_and_exit_task(sch, p);
+ scx_set_task_state(p, SCX_TASK_INIT);
+ rcu_assign_pointer(p->scx.sched, parent);
+ scx_set_task_state(p, SCX_TASK_READY);
+ scx_enable_task(parent, p);
+ }
+ task_rq_unlock(rq, p, &rf);
+
+ put_task_struct(p);
+ }
+ scx_task_iter_stop(&sti);
+
+ scx_cgroup_unlock();
+ percpu_up_write(&scx_fork_rwsem);
+
+ /*
+ * All tasks are moved off of @sch but there may still be on-going
+ * operations (e.g. ops.select_cpu()). Drain them by flushing RCU. Use
+ * the expedited version as ancestors may be waiting in bypass mode.
+ * Also, tell the parent that there is no need to keep running bypass
+ * DSQs for us.
+ */
+ synchronize_rcu_expedited();
+ disable_bypass_dsp(sch);
+
+ scx_unlink_sched(sch);
+
+ mutex_unlock(&scx_enable_mutex);
+
+ /*
+ * @sch is now unlinked from the parent's children list. Notify and call
+ * ops.sub_detach/exit(). Note that ops.sub_detach/exit() must be called
+ * after unlinking and releasing all locks. See scx_claim_exit().
+ */
+ wake_up_all(&scx_unlink_waitq);
+
+ if (sch->ops.sub_detach && sch->sub_attached) {
+ struct scx_sub_detach_args sub_detach_args = {
+ .ops = &sch->ops,
+ .cgroup_path = sch->cgrp_path,
+ };
+ SCX_CALL_OP(parent, SCX_KF_UNLOCKED, sub_detach, NULL,
+ &sub_detach_args);
+ }
+
+ if (sch->ops.exit)
+ SCX_CALL_OP(sch, SCX_KF_UNLOCKED, exit, NULL, sch->exit_info);
+ kobject_del(&sch->kobj);
+ }
+ #else /* CONFIG_EXT_SUB_SCHED */
+ static void drain_descendants(struct scx_sched *sch) { }
+ static void scx_sub_disable(struct scx_sched *sch) { }
+ #endif /* CONFIG_EXT_SUB_SCHED */
+
+ static void scx_root_disable(struct scx_sched *sch)
{
- struct scx_sched *sch = container_of(work, struct scx_sched, disable_work);
struct scx_exit_info *ei = sch->exit_info;
struct scx_task_iter sti;
struct task_struct *p;
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
next reply other threads:[~2026-03-09 18:49 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-09 18:49 Mark Brown [this message]
2026-03-09 20:02 ` linux-next: manual merge of the sched-ext tree with the tip tree Tejun Heo
2026-03-09 20:11 ` [PATCH sched_ext/for-7.1] sched_ext: Replace system_unbound_wq with system_dfl_wq in scx_kobj_release() Tejun Heo
-- strict thread matches above, loose matches on Subject: below --
2026-03-03 13:28 linux-next: manual merge of the sched-ext tree with the tip tree Mark Brown
2024-10-15 2:49 Stephen Rothwell
2024-10-17 2:22 ` Stephen Rothwell
2024-09-11 6:40 Stephen Rothwell
2024-09-11 19:03 ` Tejun Heo
2024-08-22 3:43 Stephen Rothwell
2024-08-01 2:45 Stephen Rothwell
2024-08-01 2:45 Stephen Rothwell
2024-08-04 17:42 ` Tejun Heo
2024-08-01 2:45 Stephen Rothwell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aa8WSH-ILSebstIB@sirena.org.uk \
--to=broonie@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-next@vger.kernel.org \
--cc=marco.crivellari@suse.com \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox