From: Peter Zijlstra <peterz@infradead.org>
To: Mel Gorman <mgorman@techsingularity.net>
Cc: linux-kernel@vger.kernel.org, Ingo Molnar <mingo@redhat.com>,
Juri Lelli <juri.lelli@redhat.com>,
Dietmar Eggemann <dietmar.eggemann@arm.com>,
Valentin Schneider <vschneid@redhat.com>,
Chris Mason <clm@meta.com>
Subject: Re: [PATCH 2/2] sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals
Date: Tue, 28 Oct 2025 16:05:45 +0100 [thread overview]
Message-ID: <20251028150545.GC4067720@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20251027133915.4103633-3-mgorman@techsingularity.net>
On Mon, Oct 27, 2025 at 01:39:15PM +0000, Mel Gorman wrote:
Still going through this; just a few early comments.
> kernel/sched/fair.c | 137 +++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 117 insertions(+), 20 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index bc0b7ce8a65d..158e0430449b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1193,6 +1203,91 @@ static s64 update_se(struct rq *rq, struct sched_entity *se)
> return delta_exec;
> }
>
> +enum preempt_wakeup_action {
> + PREEMPT_WAKEUP_NONE, /* No action on the buddy */
> + PREEMPT_WAKEUP_NEXT, /* Check next is most eligible
> + * before rescheduling.
> + */
> + PREEMPT_WAKEUP_RESCHED, /* Plain reschedule */
> +};
> +
> +static void set_next_buddy(struct sched_entity *se);
> +
> +static inline enum preempt_wakeup_action
> +__do_preempt_buddy(struct rq *rq, struct cfs_rq *cfs_rq, int wake_flags,
> + struct sched_entity *pse, struct sched_entity *se)
> +{
> + bool pse_before;
> +
> + /*
> + * Ignore wakee preemption on WF_WORK as it is less likely that
> + * there is shared data as exec often follow fork. Do not
> + * preempt for tasks that are sched_delayed as it would violate
> + * EEVDF to forcibly queue an ineligible task.
> + */
> + if (!sched_feat(NEXT_BUDDY) ||
> + (wake_flags & WF_FORK) ||
> + (pse->sched_delayed)) {
> + return PREEMPT_WAKEUP_NONE;
> + }
> +
> + /* Reschedule if waker is no longer eligible. */
> + if (!entity_eligible(cfs_rq, se))
> + return PREEMPT_WAKEUP_RESCHED;
> +
> + /*
> + * Keep existing buddy if the deadline is sooner than pse.
> + * The downside is that the older buddy may be cache cold
> + * but that is unpredictable where as an earlier deadline
> + * is absolute.
> + */
> + if (cfs_rq->next && entity_before(cfs_rq->next, pse))
> + return PREEMPT_WAKEUP_NONE;
> +
> + set_next_buddy(pse);
> +
> + /*
> + * WF_SYNC|WF_TTWU indicates the waker expects to sleep but it is not
> + * strictly enforced because the hint is either misunderstood or
> + * multiple tasks must be woken up.
> + */
> + pse_before = entity_before(pse, se);
> + if (wake_flags & WF_SYNC) {
> + u64 delta = rq_clock_task(rq) - se->exec_start;
> + u64 threshold = sysctl_sched_migration_cost;
> +
> + /*
> + * WF_SYNC without WF_TTWU is not expected so warn if it
> + * happens even though it is likely harmless.
> + */
> + WARN_ON_ONCE(!(wake_flags | WF_TTWU));
> +
> + if ((s64)delta < 0)
> + delta = 0;
> +
> + /*
> + * WF_RQ_SELECTED implies the tasks are stacking on a
> + * CPU when they could run on other CPUs. Reduce the
> + * threshold before preemption is allowed to an
> + * arbitrary lower value as it is more likely (but not
> + * guaranteed) the waker requires the wakee to finish.
> + */
> + if (wake_flags & WF_RQ_SELECTED)
> + threshold >>= 2;
> +
> + /*
> + * As WF_SYNC is not strictly obeyed, allow some runtime for
> + * batch wakeups to be issued.
> + */
> + if (pse_before && delta >= threshold)
> + return PREEMPT_WAKEUP_RESCHED;
> +
> + return PREEMPT_WAKEUP_NONE;
> + }
> +
> + return PREEMPT_WAKEUP_NEXT;
> +}
All this seems weirdly placed inside the file. Is there a reason this is
placed so far away from its only caller?
> /*
> * Used by other classes to account runtime.
> */
> @@ -7028,8 +7113,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> hrtick_update(rq);
> }
>
> -static void set_next_buddy(struct sched_entity *se);
> -
> /*
> * Basically dequeue_task_fair(), except it can deal with dequeue_entity()
> * failing half-way through and resume the dequeue later.
> @@ -8734,7 +8817,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
> struct sched_entity *se = &donor->se, *pse = &p->se;
> struct cfs_rq *cfs_rq = task_cfs_rq(donor);
> int cse_is_idle, pse_is_idle;
> - bool do_preempt_short = false;
> + enum preempt_wakeup_action do_preempt_short = PREEMPT_WAKEUP_NONE;
naming seems off; I'm not sure what this still has to do with short.
Perhaps just preempt_action or whatever?
>
> if (unlikely(se == pse))
> return;
> @@ -8748,10 +8831,6 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
> if (task_is_throttled(p))
> return;
>
> - if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) {
> - set_next_buddy(pse);
> - }
> -
> /*
> * We can come here with TIF_NEED_RESCHED already set from new task
> * wake up path.
> @@ -8783,7 +8862,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
> * When non-idle entity preempt an idle entity,
> * don't give idle entity slice protection.
> */
> - do_preempt_short = true;
> + do_preempt_short = PREEMPT_WAKEUP_NEXT;
> goto preempt;
> }
>
> @@ -8802,7 +8881,25 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
> * If @p has a shorter slice than current and @p is eligible, override
> * current's slice protection in order to allow preemption.
> */
> - do_preempt_short = sched_feat(PREEMPT_SHORT) && (pse->slice < se->slice);
> + if (sched_feat(PREEMPT_SHORT) && (pse->slice < se->slice)) {
> + do_preempt_short = PREEMPT_WAKEUP_NEXT;
> + } else {
> + /*
> + * If @p potentially is completing work required by current then
> + * consider preemption.
> + */
> + do_preempt_short = __do_preempt_buddy(rq, cfs_rq, wake_flags,
> + pse, se);
> + }
> +
> + switch (do_preempt_short) {
> + case PREEMPT_WAKEUP_NONE:
> + return;
> + case PREEMPT_WAKEUP_RESCHED:
> + goto preempt;
> + case PREEMPT_WAKEUP_NEXT:
> + break;
> + }
>
> /*
> * If @p has become the most eligible task, force preemption.
> @@ -8810,7 +8907,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
> if (__pick_eevdf(cfs_rq, !do_preempt_short) == pse)
> goto preempt;
>
> - if (sched_feat(RUN_TO_PARITY) && do_preempt_short)
> + if (sched_feat(RUN_TO_PARITY) && do_preempt_short != PREEMPT_WAKEUP_NONE)
> update_protect_slice(cfs_rq, se);
WAKEUP_NONE did a return above, I don't think you can get here with
WAKEUP_NONE, making the above condition always true.
next prev parent reply other threads:[~2025-10-28 15:05 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20251027133915.4103633-1-mgorman@techsingularity.net>
2025-10-27 13:39 ` [PATCH 1/2] sched/fair: Enable scheduler feature NEXT_BUDDY Mel Gorman
2025-10-28 14:37 ` Peter Zijlstra
2025-10-27 13:39 ` [PATCH 2/2] sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals Mel Gorman
2025-10-28 15:05 ` Peter Zijlstra [this message]
2025-10-31 9:46 ` Mel Gorman
2025-10-28 15:09 ` Peter Zijlstra
2025-10-31 9:48 ` Mel Gorman
2025-10-28 15:33 ` Peter Zijlstra
2025-10-28 15:47 ` Peter Zijlstra
2025-10-30 9:10 ` Peter Zijlstra
2025-10-31 10:27 ` Mel Gorman
2025-11-12 12:25 [PATCH 0/2 v5] Reintroduce NEXT_BUDDY for EEVDF Mel Gorman
[not found] ` <20251112122521.1331238-3-mgorman@techsingularity.net>
2025-11-12 14:48 ` [PATCH 2/2] sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals Peter Zijlstra
2025-11-13 8:26 ` Madadi Vineeth Reddy
2025-11-13 9:04 ` Mel Gorman
2025-11-14 12:13 ` Peter Zijlstra
-- strict thread matches above, loose matches on Subject: below --
2025-11-03 11:04 [PATCH 0/2 v4] Reintroduce NEXT_BUDDY for EEVDF Mel Gorman
2025-11-03 11:04 ` [PATCH 2/2] sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals Mel Gorman
2025-11-03 14:07 ` Peter Zijlstra
2025-11-03 14:14 ` Peter Zijlstra
2025-11-05 21:48 ` Madadi Vineeth Reddy
2025-11-07 8:53 ` Mel Gorman
2025-10-21 14:28 [RFC PATCH 0/2] Reintroduce NEXT_BUDDY for EEVDF v2 Mel Gorman
2025-10-21 14:28 ` [PATCH 2/2] sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals Mel Gorman
2025-10-23 6:29 ` K Prateek Nayak
[not found] <20250714134429.19624-1-mgorman@techsingularity.net>
2025-07-14 13:44 ` Mel Gorman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251028150545.GC4067720@noisy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=clm@meta.com \
--cc=dietmar.eggemann@arm.com \
--cc=juri.lelli@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mgorman@techsingularity.net \
--cc=mingo@redhat.com \
--cc=vschneid@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox