All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Mel Gorman <mgorman@techsingularity.net>
Cc: linux-kernel@vger.kernel.org, Ingo Molnar <mingo@redhat.com>,
	Juri Lelli <juri.lelli@redhat.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Valentin Schneider <vschneid@redhat.com>,
	Chris Mason <clm@meta.com>
Subject: Re: [PATCH 2/2] sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals
Date: Tue, 28 Oct 2025 16:05:45 +0100	[thread overview]
Message-ID: <20251028150545.GC4067720@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20251027133915.4103633-3-mgorman@techsingularity.net>

On Mon, Oct 27, 2025 at 01:39:15PM +0000, Mel Gorman wrote:


Still going through this; just a few early comments.


>  kernel/sched/fair.c | 137 +++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 117 insertions(+), 20 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index bc0b7ce8a65d..158e0430449b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1193,6 +1203,91 @@ static s64 update_se(struct rq *rq, struct sched_entity *se)
>  	return delta_exec;
>  }
>  
> +enum preempt_wakeup_action {
> +	PREEMPT_WAKEUP_NONE,		/* No action on the buddy */
> +	PREEMPT_WAKEUP_NEXT,		/* Check next is most eligible
> +					 * before rescheduling.
> +					 */
> +	PREEMPT_WAKEUP_RESCHED,		/* Plain reschedule */
> +};
> +
> +static void set_next_buddy(struct sched_entity *se);
> +
> +static inline enum preempt_wakeup_action
> +__do_preempt_buddy(struct rq *rq, struct cfs_rq *cfs_rq, int wake_flags,
> +		 struct sched_entity *pse, struct sched_entity *se)
> +{
> +	bool pse_before;
> +
> +	/*
> +	 * Ignore wakee preemption on WF_WORK as it is less likely that
> +	 * there is shared data as exec often follow fork. Do not
> +	 * preempt for tasks that are sched_delayed as it would violate
> +	 * EEVDF to forcibly queue an ineligible task.
> +	 */
> +	if (!sched_feat(NEXT_BUDDY) ||
> +	    (wake_flags & WF_FORK) ||
> +	    (pse->sched_delayed)) {
> +		return PREEMPT_WAKEUP_NONE;
> +	}
> +
> +	/* Reschedule if waker is no longer eligible. */
> +	if (!entity_eligible(cfs_rq, se))
> +		return PREEMPT_WAKEUP_RESCHED;
> +
> +	/*
> +	 * Keep existing buddy if the deadline is sooner than pse.
> +	 * The downside is that the older buddy may be cache cold
> +	 * but that is unpredictable where as an earlier deadline
> +	 * is absolute.
> +	 */
> +	if (cfs_rq->next && entity_before(cfs_rq->next, pse))
> +		return PREEMPT_WAKEUP_NONE;
> +
> +	set_next_buddy(pse);
> +
> +	/*
> +	 * WF_SYNC|WF_TTWU indicates the waker expects to sleep but it is not
> +	 * strictly enforced because the hint is either misunderstood or
> +	 * multiple tasks must be woken up.
> +	 */
> +	pse_before = entity_before(pse, se);
> +	if (wake_flags & WF_SYNC) {
> +		u64 delta = rq_clock_task(rq) - se->exec_start;
> +		u64 threshold = sysctl_sched_migration_cost;
> +
> +		/*
> +		 * WF_SYNC without WF_TTWU is not expected so warn if it
> +		 * happens even though it is likely harmless.
> +		 */
> +		WARN_ON_ONCE(!(wake_flags | WF_TTWU));
> +
> +		if ((s64)delta < 0)
> +			delta = 0;
> +
> +		/*
> +		 * WF_RQ_SELECTED implies the tasks are stacking on a
> +		 * CPU when they could run on other CPUs. Reduce the
> +		 * threshold before preemption is allowed to an
> +		 * arbitrary lower value as it is more likely (but not
> +		 * guaranteed) the waker requires the wakee to finish.
> +		 */
> +		if (wake_flags & WF_RQ_SELECTED)
> +			threshold >>= 2;
> +
> +		/*
> +		 * As WF_SYNC is not strictly obeyed, allow some runtime for
> +		 * batch wakeups to be issued.
> +		 */
> +		if (pse_before && delta >= threshold)
> +			return PREEMPT_WAKEUP_RESCHED;
> +
> +		return PREEMPT_WAKEUP_NONE;
> +	}
> +
> +	return PREEMPT_WAKEUP_NEXT;
> +}

All this seems weirdly placed inside the file. Is there a reason this is
placed so far away from its only caller?

>  /*
>   * Used by other classes to account runtime.
>   */

> @@ -7028,8 +7113,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>  	hrtick_update(rq);
>  }
>  
> -static void set_next_buddy(struct sched_entity *se);
> -
>  /*
>   * Basically dequeue_task_fair(), except it can deal with dequeue_entity()
>   * failing half-way through and resume the dequeue later.
> @@ -8734,7 +8817,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
>  	struct sched_entity *se = &donor->se, *pse = &p->se;
>  	struct cfs_rq *cfs_rq = task_cfs_rq(donor);
>  	int cse_is_idle, pse_is_idle;
> -	bool do_preempt_short = false;
> +	enum preempt_wakeup_action do_preempt_short = PREEMPT_WAKEUP_NONE;

naming seems off; I'm not sure what this still has to do with short.
Perhaps just preempt_action or whatever?

>  
>  	if (unlikely(se == pse))
>  		return;
> @@ -8748,10 +8831,6 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
>  	if (task_is_throttled(p))
>  		return;
>  
> -	if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) {
> -		set_next_buddy(pse);
> -	}
> -
>  	/*
>  	 * We can come here with TIF_NEED_RESCHED already set from new task
>  	 * wake up path.
> @@ -8783,7 +8862,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
>  		 * When non-idle entity preempt an idle entity,
>  		 * don't give idle entity slice protection.
>  		 */
> -		do_preempt_short = true;
> +		do_preempt_short = PREEMPT_WAKEUP_NEXT;
>  		goto preempt;
>  	}
>  
> @@ -8802,7 +8881,25 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
>  	 * If @p has a shorter slice than current and @p is eligible, override
>  	 * current's slice protection in order to allow preemption.
>  	 */
> -	do_preempt_short = sched_feat(PREEMPT_SHORT) && (pse->slice < se->slice);
> +	if (sched_feat(PREEMPT_SHORT) && (pse->slice < se->slice)) {
> +		do_preempt_short = PREEMPT_WAKEUP_NEXT;
> +	} else {
> +		/*
> +		 * If @p potentially is completing work required by current then
> +		 * consider preemption.
> +		 */
> +		do_preempt_short = __do_preempt_buddy(rq, cfs_rq, wake_flags,
> +						      pse, se);
> +	}
> +
> +	switch (do_preempt_short) {
> +	case PREEMPT_WAKEUP_NONE:
> +		return;
> +	case PREEMPT_WAKEUP_RESCHED:
> +		goto preempt;
> +	case PREEMPT_WAKEUP_NEXT:
> +		break;
> +	}
>  
>  	/*
>  	 * If @p has become the most eligible task, force preemption.
> @@ -8810,7 +8907,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
>  	if (__pick_eevdf(cfs_rq, !do_preempt_short) == pse)
>  		goto preempt;
>  
> -	if (sched_feat(RUN_TO_PARITY) && do_preempt_short)
> +	if (sched_feat(RUN_TO_PARITY) && do_preempt_short != PREEMPT_WAKEUP_NONE)
>  		update_protect_slice(cfs_rq, se);

WAKEUP_NONE did a return above, I don't think you can get here with
WAKEUP_NONE, making the above condition always true.

  reply	other threads:[~2025-10-28 15:05 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20251027133915.4103633-1-mgorman@techsingularity.net>
2025-10-27 13:39 ` [PATCH 1/2] sched/fair: Enable scheduler feature NEXT_BUDDY Mel Gorman
2025-10-28 14:37   ` Peter Zijlstra
2025-10-27 13:39 ` [PATCH 2/2] sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals Mel Gorman
2025-10-28 15:05   ` Peter Zijlstra [this message]
2025-10-31  9:46     ` Mel Gorman
2025-10-28 15:09   ` Peter Zijlstra
2025-10-31  9:48     ` Mel Gorman
2025-10-28 15:33   ` Peter Zijlstra
2025-10-28 15:47     ` Peter Zijlstra
2025-10-30  9:10   ` Peter Zijlstra
2025-10-31 10:27     ` Mel Gorman
2025-11-12 12:25 [PATCH 0/2 v5] Reintroduce NEXT_BUDDY for EEVDF Mel Gorman
     [not found] ` <20251112122521.1331238-3-mgorman@techsingularity.net>
2025-11-12 14:48   ` [PATCH 2/2] sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals Peter Zijlstra
2025-11-13  8:26     ` Madadi Vineeth Reddy
2025-11-13  9:04     ` Mel Gorman
2025-11-14 12:13       ` Peter Zijlstra
  -- strict thread matches above, loose matches on Subject: below --
2025-11-03 11:04 [PATCH 0/2 v4] Reintroduce NEXT_BUDDY for EEVDF Mel Gorman
2025-11-03 11:04 ` [PATCH 2/2] sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals Mel Gorman
2025-11-03 14:07   ` Peter Zijlstra
2025-11-03 14:14     ` Peter Zijlstra
2025-11-05 21:48   ` Madadi Vineeth Reddy
2025-11-07  8:53     ` Mel Gorman
2025-10-21 14:28 [RFC PATCH 0/2] Reintroduce NEXT_BUDDY for EEVDF v2 Mel Gorman
2025-10-21 14:28 ` [PATCH 2/2] sched/fair: Reimplement NEXT_BUDDY to align with EEVDF goals Mel Gorman
2025-10-23  6:29   ` K Prateek Nayak
     [not found] <20250714134429.19624-1-mgorman@techsingularity.net>
2025-07-14 13:44 ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251028150545.GC4067720@noisy.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=clm@meta.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@redhat.com \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.