public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Andrea Righi <andrea.righi@linux.dev>
To: Tejun Heo <tj@kernel.org>
Cc: David Vernet <void@manifault.com>,
	Changwoo Min <changwoo@igalia.com>,
	Dan Schatzberg <schatzberg.dan@gmail.com>,
	Emil Tsalapatis <etsal@meta.com>,
	sched-ext@lists.linux.dev, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 05/13] sched_ext: Simplify breather mechanism with scx_aborting flag
Date: Mon, 10 Nov 2025 08:45:03 +0100	[thread overview]
Message-ID: <aRGX_3UR_dPLTTrn@gpd4> (raw)
In-Reply-To: <20251109183112.2412147-6-tj@kernel.org>

On Sun, Nov 09, 2025 at 08:31:04AM -1000, Tejun Heo wrote:
> The breather mechanism was introduced in 62dcbab8b0ef ("sched_ext: Avoid
> live-locking bypass mode switching") and e32c260195e6 ("sched_ext: Enable the
> ops breather and eject BPF scheduler on softlockup") to prevent live-locks by
> injecting delays when CPUs are trapped in dispatch paths.
> 
> Currently, it uses scx_breather_depth (atomic_t) and scx_in_softlockup
> (unsigned long) with separate increment/decrement and cleanup operations. The
> breather is only activated when aborting, so tie it directly to the exit
> mechanism. Replace both variables with scx_aborting flag set when exit is
> claimed and cleared after bypass is enabled. Introduce scx_claim_exit() to
> consolidate exit_kind claiming and breather enablement. This eliminates
> scx_clear_softlockup() and simplifies scx_softlockup() and scx_bypass().
> 
> The breather mechanism will be replaced by a different abort mechanism in a
> future patch. This simplification prepares for that change.

Acked-by: Andrea Righi <arighi@nvidia.com>

Thanks,
-Andrea

> 
> Cc: Dan Schatzberg <schatzberg.dan@gmail.com>
> Cc: Emil Tsalapatis <etsal@meta.com>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> ---
>  kernel/sched/ext.c | 54 +++++++++++++++++++++-------------------------
>  1 file changed, 25 insertions(+), 29 deletions(-)
> 
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 4b8b91494947..905d01f74687 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -33,9 +33,8 @@ static DEFINE_MUTEX(scx_enable_mutex);
>  DEFINE_STATIC_KEY_FALSE(__scx_enabled);
>  DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
>  static atomic_t scx_enable_state_var = ATOMIC_INIT(SCX_DISABLED);
> -static unsigned long scx_in_softlockup;
> -static atomic_t scx_breather_depth = ATOMIC_INIT(0);
>  static int scx_bypass_depth;
> +static bool scx_aborting;
>  static bool scx_init_task_enabled;
>  static bool scx_switching_all;
>  DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
> @@ -1834,7 +1833,7 @@ static void scx_breather(struct rq *rq)
>  
>  	lockdep_assert_rq_held(rq);
>  
> -	if (likely(!atomic_read(&scx_breather_depth)))
> +	if (likely(!READ_ONCE(scx_aborting)))
>  		return;
>  
>  	raw_spin_rq_unlock(rq);
> @@ -1843,9 +1842,9 @@ static void scx_breather(struct rq *rq)
>  
>  	do {
>  		int cnt = 1024;
> -		while (atomic_read(&scx_breather_depth) && --cnt)
> +		while (READ_ONCE(scx_aborting) && --cnt)
>  			cpu_relax();
> -	} while (atomic_read(&scx_breather_depth) &&
> +	} while (READ_ONCE(scx_aborting) &&
>  		 time_before64(ktime_get_ns(), until));
>  
>  	raw_spin_rq_lock(rq);
> @@ -3740,30 +3739,14 @@ void scx_softlockup(u32 dur_s)
>  		goto out_unlock;
>  	}
>  
> -	/* allow only one instance, cleared at the end of scx_bypass() */
> -	if (test_and_set_bit(0, &scx_in_softlockup))
> -		goto out_unlock;
> -
>  	printk_deferred(KERN_ERR "sched_ext: Soft lockup - CPU%d stuck for %us, disabling \"%s\"\n",
>  			smp_processor_id(), dur_s, scx_root->ops.name);
>  
> -	/*
> -	 * Some CPUs may be trapped in the dispatch paths. Enable breather
> -	 * immediately; otherwise, we might even be able to get to scx_bypass().
> -	 */
> -	atomic_inc(&scx_breather_depth);
> -
>  	scx_error(sch, "soft lockup - CPU#%d stuck for %us", smp_processor_id(), dur_s);
>  out_unlock:
>  	rcu_read_unlock();
>  }
>  
> -static void scx_clear_softlockup(void)
> -{
> -	if (test_and_clear_bit(0, &scx_in_softlockup))
> -		atomic_dec(&scx_breather_depth);
> -}
> -
>  /**
>   * scx_bypass - [Un]bypass scx_ops and guarantee forward progress
>   * @bypass: true for bypass, false for unbypass
> @@ -3826,8 +3809,6 @@ static void scx_bypass(bool bypass)
>  				      ktime_get_ns() - bypass_timestamp);
>  	}
>  
> -	atomic_inc(&scx_breather_depth);
> -
>  	/*
>  	 * No task property is changing. We just need to make sure all currently
>  	 * queued tasks are re-queued according to the new scx_rq_bypassing()
> @@ -3883,10 +3864,8 @@ static void scx_bypass(bool bypass)
>  		raw_spin_rq_unlock(rq);
>  	}
>  
> -	atomic_dec(&scx_breather_depth);
>  unlock:
>  	raw_spin_unlock_irqrestore(&bypass_lock, flags);
> -	scx_clear_softlockup();
>  }
>  
>  static void free_exit_info(struct scx_exit_info *ei)
> @@ -3981,6 +3960,7 @@ static void scx_disable_workfn(struct kthread_work *work)
>  
>  	/* guarantee forward progress by bypassing scx_ops */
>  	scx_bypass(true);
> +	WRITE_ONCE(scx_aborting, false);
>  
>  	switch (scx_set_enable_state(SCX_DISABLING)) {
>  	case SCX_DISABLING:
> @@ -4103,9 +4083,24 @@ static void scx_disable_workfn(struct kthread_work *work)
>  	scx_bypass(false);
>  }
>  
> -static void scx_disable(enum scx_exit_kind kind)
> +static bool scx_claim_exit(struct scx_sched *sch, enum scx_exit_kind kind)
>  {
>  	int none = SCX_EXIT_NONE;
> +
> +	if (!atomic_try_cmpxchg(&sch->exit_kind, &none, kind))
> +		return false;
> +
> +	/*
> +	 * Some CPUs may be trapped in the dispatch paths. Enable breather
> +	 * immediately; otherwise, we might not even be able to get to
> +	 * scx_bypass().
> +	 */
> +	WRITE_ONCE(scx_aborting, true);
> +	return true;
> +}
> +
> +static void scx_disable(enum scx_exit_kind kind)
> +{
>  	struct scx_sched *sch;
>  
>  	if (WARN_ON_ONCE(kind == SCX_EXIT_NONE || kind == SCX_EXIT_DONE))
> @@ -4114,7 +4109,7 @@ static void scx_disable(enum scx_exit_kind kind)
>  	rcu_read_lock();
>  	sch = rcu_dereference(scx_root);
>  	if (sch) {
> -		atomic_try_cmpxchg(&sch->exit_kind, &none, kind);
> +		scx_claim_exit(sch, kind);
>  		kthread_queue_work(sch->helper, &sch->disable_work);
>  	}
>  	rcu_read_unlock();
> @@ -4435,9 +4430,8 @@ static void scx_vexit(struct scx_sched *sch,
>  		      const char *fmt, va_list args)
>  {
>  	struct scx_exit_info *ei = sch->exit_info;
> -	int none = SCX_EXIT_NONE;
>  
> -	if (!atomic_try_cmpxchg(&sch->exit_kind, &none, kind))
> +	if (!scx_claim_exit(sch, kind))
>  		return;
>  
>  	ei->exit_code = exit_code;
> @@ -4653,6 +4647,8 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
>  	 */
>  	WARN_ON_ONCE(scx_set_enable_state(SCX_ENABLING) != SCX_DISABLED);
>  	WARN_ON_ONCE(scx_root);
> +	if (WARN_ON_ONCE(READ_ONCE(scx_aborting)))
> +		WRITE_ONCE(scx_aborting, false);
>  
>  	atomic_long_set(&scx_nr_rejected, 0);
>  
> -- 
> 2.51.1
> 

  reply	other threads:[~2025-11-10  7:45 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-09 18:30 [PATCHSET sched_ext/for-6.19] sched_ext: Improve bypass mode scalability Tejun Heo
2025-11-09 18:31 ` [PATCH 01/13] sched_ext: Don't set ddsp_dsq_id during select_cpu in bypass mode Tejun Heo
2025-11-10  6:57   ` Andrea Righi
2025-11-10 16:08     ` Tejun Heo
2025-11-09 18:31 ` [PATCH 02/13] sched_ext: Make slice values tunable and use shorter slice " Tejun Heo
2025-11-10  7:03   ` Andrea Righi
2025-11-10  7:59     ` Andrea Righi
2025-11-10 16:21     ` Tejun Heo
2025-11-10 16:22       ` Tejun Heo
2025-11-10  8:22   ` Andrea Righi
2025-11-11 14:57   ` Dan Schatzberg
2025-11-09 18:31 ` [PATCH 03/13] sched_ext: Refactor do_enqueue_task() local and global DSQ paths Tejun Heo
2025-11-10  7:21   ` Andrea Righi
2025-11-09 18:31 ` [PATCH 04/13] sched_ext: Use per-CPU DSQs instead of per-node global DSQs in bypass mode Tejun Heo
2025-11-10  7:42   ` Andrea Righi
2025-11-10 16:42     ` Tejun Heo
2025-11-10 17:30       ` Andrea Righi
2025-11-11 15:31   ` Dan Schatzberg
2025-11-09 18:31 ` [PATCH 05/13] sched_ext: Simplify breather mechanism with scx_aborting flag Tejun Heo
2025-11-10  7:45   ` Andrea Righi [this message]
2025-11-11 15:34   ` Dan Schatzberg
2025-11-09 18:31 ` [PATCH 06/13] sched_ext: Exit dispatch and move operations immediately when aborting Tejun Heo
2025-11-10  8:20   ` Andrea Righi
2025-11-10 18:51     ` Tejun Heo
2025-11-11 15:46   ` Dan Schatzberg
2025-11-09 18:31 ` [PATCH 07/13] sched_ext: Make scx_exit() and scx_vexit() return bool Tejun Heo
2025-11-10  8:28   ` Andrea Righi
2025-11-11 15:48   ` Dan Schatzberg
2025-11-09 18:31 ` [PATCH 08/13] sched_ext: Refactor lockup handlers into handle_lockup() Tejun Heo
2025-11-10  8:29   ` Andrea Righi
2025-11-11 15:49   ` Dan Schatzberg
2025-11-09 18:31 ` [PATCH 09/13] sched_ext: Make handle_lockup() propagate scx_verror() result Tejun Heo
2025-11-10  8:29   ` Andrea Righi
2025-11-09 18:31 ` [PATCH 10/13] sched_ext: Hook up hardlockup detector Tejun Heo
2025-11-10  8:31   ` Andrea Righi
2025-11-09 18:31 ` [PATCH 11/13] sched_ext: Add scx_cpu0 example scheduler Tejun Heo
2025-11-10  8:36   ` Andrea Righi
2025-11-10 18:44     ` Tejun Heo
2025-11-10 21:06       ` Andrea Righi
2025-11-10 22:08         ` Tejun Heo
2025-11-09 18:31 ` [PATCH 12/13] sched_ext: Factor out scx_dsq_list_node cursor initialization into INIT_DSQ_LIST_CURSOR Tejun Heo
2025-11-10  8:37   ` Andrea Righi
2025-11-09 18:31 ` [PATCH 13/13] sched_ext: Implement load balancer for bypass mode Tejun Heo
2025-11-10  9:38   ` Andrea Righi
2025-11-10 19:21     ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aRGX_3UR_dPLTTrn@gpd4 \
    --to=andrea.righi@linux.dev \
    --cc=changwoo@igalia.com \
    --cc=etsal@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=schatzberg.dan@gmail.com \
    --cc=sched-ext@lists.linux.dev \
    --cc=tj@kernel.org \
    --cc=void@manifault.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox