[PATCH 1/2] evl/sched: Add sched_out handler to sched

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
@ 2026-06-14 19:35 Jan Kiszka
  2026-06-14 19:35 ` [PATCH 2/2] evl/sched/quota: Correct budget tracking for preempted threads Jan Kiszka
  2026-06-15  6:28 ` [PATCH 1/2] evl/sched: Add sched_out handler to sched_class Philippe Gerum
  0 siblings, 2 replies; 24+ messages in thread
From: Jan Kiszka @ 2026-06-14 19:35 UTC (permalink / raw)
  To: Philippe Gerum, Xenomai

From: Jan Kiszka <jan.kiszka@siemens.com>

This shall be invoked before a thread switch, providing both the current
and the next thread as arguments. Some scheduling classes may need it to
correctly handle their state as the sched_pick may not be invoked when a
higher-weighted class is providing the next thread.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 include/evl/sched.h     | 2 ++
 kernel/evl/sched/core.c | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/include/evl/sched.h b/include/evl/sched.h
index ae9690860146c..0b16f1b1cf626 100644
--- a/include/evl/sched.h
+++ b/include/evl/sched.h
@@ -120,6 +120,8 @@ struct evl_sched_class {
 	void (*sched_dequeue)(struct evl_thread *thread);
 	void (*sched_requeue)(struct evl_thread *thread);
 	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
+	void (*sched_out)(struct evl_thread *thread,
+			  struct evl_thread *next);
 	void (*sched_yield)(struct evl_thread *thread);
 	void (*sched_migrate)(struct evl_thread *thread,
 			      struct evl_rq *rq);
diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
index eb133e334d30f..0d49fc16bd67e 100644
--- a/kernel/evl/sched/core.c
+++ b/kernel/evl/sched/core.c
@@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
  */
 void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
 {
+	struct evl_sched_class *prev_schedclass;
 	struct evl_rq *this_rq = this_evl_rq();
 	struct evl_thread *prev, *next, *curr;
 	bool leaving_inband, inband_tail;
@@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
 	this_rq->curr = next;
 	leaving_inband = false;
 
+	prev_schedclass = prev->sched_class;
+	if (prev_schedclass->sched_out)
+		prev_schedclass->sched_out(prev, next);
+
 	/*
 	 * Careful: we _must_ have updated this_rq->curr before
 	 * performing the rest of the context switch code
-- 
2.47.3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/2] evl/sched/quota: Correct budget tracking for preempted threads
  2026-06-14 19:35 [PATCH 1/2] evl/sched: Add sched_out handler to sched_class Jan Kiszka
@ 2026-06-14 19:35 ` Jan Kiszka
  2026-06-15  8:47   ` Philippe Gerum
  2026-06-15  6:28 ` [PATCH 1/2] evl/sched: Add sched_out handler to sched_class Philippe Gerum
  1 sibling, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2026-06-14 19:35 UTC (permalink / raw)
  To: Philippe Gerum, Xenomai

From: Jan Kiszka <jan.kiszka@siemens.com>

This was so far not possible due to a missing sched_class callback,
causing threads being credited for the preemption time of threads from
higher-weighted classes.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 kernel/evl/sched/quota.c | 56 +++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 15 deletions(-)

diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
index 0829da711a66a..d331dc2e8f7ac 100644
--- a/kernel/evl/sched/quota.c
+++ b/kernel/evl/sched/quota.c
@@ -187,11 +187,28 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
 	raw_spin_unlock(&rq->lock);
 }
 
+static void charge_usage(struct evl_quota_group *tg, ktime_t now)
+{
+	ktime_t elapsed;
+
+	elapsed =  ktime_sub(now, tg->run_start);
+	if (elapsed < tg->run_budget)
+		tg->run_budget = ktime_sub(tg->run_budget, elapsed);
+	else
+		tg->run_budget = 0;
+}
+
 static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
 {
+	struct evl_quota_group *tg;
 	struct evl_rq *rq;
 
 	rq = container_of(timer, struct evl_rq, quota.limit_timer);
+
+	tg = rq->curr->quota;
+	if (tg)
+		charge_usage(tg, evl_ktime_monotonic());
+
 	/*
 	 * Force a rescheduling on the return path of the current
 	 * interrupt, so that the budget is re-evaluated for the
@@ -253,6 +270,8 @@ static bool quota_setparam(struct evl_thread *thread,
 			/* Dequeued earlier by our caller. */
 			list_del(&thread->quota_next);
 			thread->quota->nr_threads--;
+		} else if (thread == evl_current()) {
+			tg->run_start = evl_ktime_monotonic();
 		}
 		thread->quota = tg;
 		list_add(&thread->quota_next, &tg->members);
@@ -400,21 +419,11 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
 	struct evl_thread *next, *curr = rq->curr;
 	struct evl_sched_quota *qs = &rq->quota;
 	struct evl_quota_group *otg, *tg;
-	ktime_t now, elapsed;
+	ktime_t now;
 
 	now = evl_ktime_monotonic();
 	otg = curr->quota;
-	if (otg == NULL)
-		goto pick;
-	/*
-	 * Charge the time consumed by the outgoing thread to the
-	 * group it belongs to.
-	 */
-	elapsed = ktime_sub(now, otg->run_start);
-	if (elapsed < otg->run_budget)
-		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
-	else
-		otg->run_budget = 0;
+
 pick:
 	next = evl_get_schedq(&rq->fifo.runnable);
 	if (next == NULL) {
@@ -430,8 +439,6 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
 	if (tg == NULL)
 		return next;
 
-	tg->run_start = now;
-
 	/*
 	 * Don't consider budget if kicked, we have to allow this
 	 * thread to run until it eventually switches to in-band
@@ -448,9 +455,12 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
 		goto pick;
 	}
 
-	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
+	if (otg != tg) {
+		tg->run_start = now;
+	} else if (evl_timer_is_running(&qs->limit_timer)) {
 		/* Same group, leave the running timer untouched. */
 		goto out;
+	}
 
 	/* Arm limit timer for the new running group. */
 	evl_start_timer(&qs->limit_timer,
@@ -462,6 +472,21 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
 	return next;
 }
 
+static void quota_out(struct evl_thread *thread, struct evl_thread *next)
+{
+	struct evl_quota_group *otg = thread->quota;
+	struct evl_quota_group *ntg = next->quota;
+	ktime_t now;
+
+	if (otg && otg != ntg) {
+		if (ntg)
+			now = ntg->run_start;
+		else
+			now = evl_ktime_monotonic();
+		charge_usage(otg, now);
+	}
+}
+
 static void quota_migrate(struct evl_thread *thread, struct evl_rq *rq)
 {
 	union evl_sched_param param;
@@ -786,6 +811,7 @@ struct evl_sched_class evl_sched_quota = {
 	.sched_dequeue		=	quota_dequeue,
 	.sched_requeue		=	quota_requeue,
 	.sched_pick		=	quota_pick,
+	.sched_out		=	quota_out,
 	.sched_migrate		=	quota_migrate,
 	.sched_chkparam		=	quota_chkparam,
 	.sched_setparam		=	quota_setparam,
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/2] evl/sched/quota: Correct budget tracking for preempted threads
  2026-06-14 19:35 ` [PATCH 2/2] evl/sched/quota: Correct budget tracking for preempted threads Jan Kiszka
@ 2026-06-15  8:47   ` Philippe Gerum
  0 siblings, 0 replies; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  8:47 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> From: Jan Kiszka <jan.kiszka@siemens.com>
>
> This was so far not possible due to a missing sched_class callback,
> causing threads being credited for the preemption time of threads from
> higher-weighted classes.
>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  kernel/evl/sched/quota.c | 56 +++++++++++++++++++++++++++++-----------
>  1 file changed, 41 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
> index 0829da711a66a..d331dc2e8f7ac 100644
> --- a/kernel/evl/sched/quota.c
> +++ b/kernel/evl/sched/quota.c
> @@ -187,11 +187,28 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>  	raw_spin_unlock(&rq->lock);
>  }
>  
> +static void charge_usage(struct evl_quota_group *tg, ktime_t now)
> +{
> +	ktime_t elapsed;
> +
> +	elapsed =  ktime_sub(now, tg->run_start);
> +	if (elapsed < tg->run_budget)
> +		tg->run_budget = ktime_sub(tg->run_budget, elapsed);
> +	else
> +		tg->run_budget = 0;
> +}
> +
>  static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>  {
> +	struct evl_quota_group *tg;
>  	struct evl_rq *rq;
>  
>  	rq = container_of(timer, struct evl_rq, quota.limit_timer);
> +
> +	tg = rq->curr->quota;
> +	if (tg)
> +		charge_usage(tg, evl_ktime_monotonic());
> +
>  	/*
>  	 * Force a rescheduling on the return path of the current
>  	 * interrupt, so that the budget is re-evaluated for the
> @@ -253,6 +270,8 @@ static bool quota_setparam(struct evl_thread *thread,
>  			/* Dequeued earlier by our caller. */
>  			list_del(&thread->quota_next);
>  			thread->quota->nr_threads--;
> +		} else if (thread == evl_current()) {
> +			tg->run_start = evl_ktime_monotonic();
>  		}
>  		thread->quota = tg;
>  		list_add(&thread->quota_next, &tg->members);
> @@ -400,21 +419,11 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>  	struct evl_thread *next, *curr = rq->curr;
>  	struct evl_sched_quota *qs = &rq->quota;
>  	struct evl_quota_group *otg, *tg;
> -	ktime_t now, elapsed;
> +	ktime_t now;
>  
>  	now = evl_ktime_monotonic();
>  	otg = curr->quota;
> -	if (otg == NULL)
> -		goto pick;
> -	/*
> -	 * Charge the time consumed by the outgoing thread to the
> -	 * group it belongs to.
> -	 */
> -	elapsed = ktime_sub(now, otg->run_start);
> -	if (elapsed < otg->run_budget)
> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
> -	else
> -		otg->run_budget = 0;
> +
>  pick:
>  	next = evl_get_schedq(&rq->fifo.runnable);
>  	if (next == NULL) {
> @@ -430,8 +439,6 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>  	if (tg == NULL)
>  		return next;
>  
> -	tg->run_start = now;
> -
>  	/*
>  	 * Don't consider budget if kicked, we have to allow this
>  	 * thread to run until it eventually switches to in-band
> @@ -448,9 +455,12 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>  		goto pick;
>  	}
>  
> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
> +	if (otg != tg) {
> +		tg->run_start = now;
> +	} else if (evl_timer_is_running(&qs->limit_timer)) {
>  		/* Same group, leave the running timer untouched. */
>  		goto out;
> +	}
>  
>  	/* Arm limit timer for the new running group. */
>  	evl_start_timer(&qs->limit_timer,
> @@ -462,6 +472,21 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>  	return next;
>  }
>  
> +static void quota_out(struct evl_thread *thread, struct evl_thread *next)
> +{
> +	struct evl_quota_group *otg = thread->quota;
> +	struct evl_quota_group *ntg = next->quota;
> +	ktime_t now;
> +
> +	if (otg && otg != ntg) {

otg can't be NULL by construction since quota_out() is called for thread->sched_class.

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-14 19:35 [PATCH 1/2] evl/sched: Add sched_out handler to sched_class Jan Kiszka
  2026-06-14 19:35 ` [PATCH 2/2] evl/sched/quota: Correct budget tracking for preempted threads Jan Kiszka
@ 2026-06-15  6:28 ` Philippe Gerum
  2026-06-15  6:42   ` Philippe Gerum
  1 sibling, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  6:28 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> From: Jan Kiszka <jan.kiszka@siemens.com>
>
> This shall be invoked before a thread switch, providing both the current
> and the next thread as arguments. Some scheduling classes may need it to
> correctly handle their state as the sched_pick may not be invoked when a
> higher-weighted class is providing the next thread.
>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  include/evl/sched.h     | 2 ++
>  kernel/evl/sched/core.c | 5 +++++
>  2 files changed, 7 insertions(+)
>
> diff --git a/include/evl/sched.h b/include/evl/sched.h
> index ae9690860146c..0b16f1b1cf626 100644
> --- a/include/evl/sched.h
> +++ b/include/evl/sched.h
> @@ -120,6 +120,8 @@ struct evl_sched_class {
>  	void (*sched_dequeue)(struct evl_thread *thread);
>  	void (*sched_requeue)(struct evl_thread *thread);
>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
> +	void (*sched_out)(struct evl_thread *thread,
> +			  struct evl_thread *next);
>  	void (*sched_yield)(struct evl_thread *thread);
>  	void (*sched_migrate)(struct evl_thread *thread,
>  			      struct evl_rq *rq);
> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
> index eb133e334d30f..0d49fc16bd67e 100644
> --- a/kernel/evl/sched/core.c
> +++ b/kernel/evl/sched/core.c
> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>   */
>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>  {
> +	struct evl_sched_class *prev_schedclass;
>  	struct evl_rq *this_rq = this_evl_rq();
>  	struct evl_thread *prev, *next, *curr;
>  	bool leaving_inband, inband_tail;
> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>  	this_rq->curr = next;
>  	leaving_inband = false;
>  
> +	prev_schedclass = prev->sched_class;
> +	if (prev_schedclass->sched_out)
> +		prev_schedclass->sched_out(prev, next);
> +
>  	/*
>  	 * Careful: we _must_ have updated this_rq->curr before
>  	 * performing the rest of the context switch code

I've been working on this lately too. It turns out that we need more
than this, although this is definitely part of the solution. I'll follow
up on this issue.

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  6:28 ` [PATCH 1/2] evl/sched: Add sched_out handler to sched_class Philippe Gerum
@ 2026-06-15  6:42   ` Philippe Gerum
  2026-06-15  6:44     ` Jan Kiszka
                       ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  6:42 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Philippe Gerum <rpm@xenomai.org> writes:

> Jan Kiszka <jan.kiszka@siemens.com> writes:
>
>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>
>> This shall be invoked before a thread switch, providing both the current
>> and the next thread as arguments. Some scheduling classes may need it to
>> correctly handle their state as the sched_pick may not be invoked when a
>> higher-weighted class is providing the next thread.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> ---
>>  include/evl/sched.h     | 2 ++
>>  kernel/evl/sched/core.c | 5 +++++
>>  2 files changed, 7 insertions(+)
>>
>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>> index ae9690860146c..0b16f1b1cf626 100644
>> --- a/include/evl/sched.h
>> +++ b/include/evl/sched.h
>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>  	void (*sched_requeue)(struct evl_thread *thread);
>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>> +	void (*sched_out)(struct evl_thread *thread,
>> +			  struct evl_thread *next);
>>  	void (*sched_yield)(struct evl_thread *thread);
>>  	void (*sched_migrate)(struct evl_thread *thread,
>>  			      struct evl_rq *rq);
>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>> index eb133e334d30f..0d49fc16bd67e 100644
>> --- a/kernel/evl/sched/core.c
>> +++ b/kernel/evl/sched/core.c
>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>   */
>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>  {
>> +	struct evl_sched_class *prev_schedclass;
>>  	struct evl_rq *this_rq = this_evl_rq();
>>  	struct evl_thread *prev, *next, *curr;
>>  	bool leaving_inband, inband_tail;
>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>  	this_rq->curr = next;
>>  	leaving_inband = false;
>>  
>> +	prev_schedclass = prev->sched_class;
>> +	if (prev_schedclass->sched_out)
>> +		prev_schedclass->sched_out(prev, next);
>> +
>>  	/*
>>  	 * Careful: we _must_ have updated this_rq->curr before
>>  	 * performing the rest of the context switch code
>
> I've been working on this lately too. It turns out that we need more
> than this, although this is definitely part of the solution. I'll follow
> up on this issue.

This is still wip, I'm sharing this early to discuss details for
reconciling both proposals (the hunk in __evl_schedule() is merely
cosmetic, no functional change).

commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
Author: Philippe Gerum <rpm@xenomai.org>
Date:   Sun Jun 14 12:08:59 2026 +0200

    evl: sched/quota: fix budget tracking on preemption
    
    Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
    runtime budget of the former is inaccurately tracked. This is due to
    the fifo->pick() handler returning a valid thread, which prevents the
    quota->pick() handler from being called. As a result, the last runtime
    period of the outgoing thread is not accounted for.
    
    To fix this, we introduce a new sched_out() handler which is called
    for the outgoing thread, which the quota policy uses to update the
    remaining budget of preempted threads appropriately. In addition, the
    implementation no longer shares the runnable thread queue with
    SCHED_FIFO.
    
    Signed-off-by: Philippe Gerum <rpm@xenomai.org>

diff --git a/include/evl/sched.h b/include/evl/sched.h
index ae9690860146..cc824c28004b 100644
--- a/include/evl/sched.h
+++ b/include/evl/sched.h
@@ -120,6 +120,7 @@ struct evl_sched_class {
 	void (*sched_dequeue)(struct evl_thread *thread);
 	void (*sched_requeue)(struct evl_thread *thread);
 	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
+	void (*sched_out)(struct evl_thread *thread);
 	void (*sched_yield)(struct evl_thread *thread);
 	void (*sched_migrate)(struct evl_thread *thread,
 			      struct evl_rq *rq);
diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
index dfe3b7390958..dc8416645da8 100644
--- a/include/evl/sched/quota.h
+++ b/include/evl/sched/quota.h
@@ -40,6 +40,7 @@ struct evl_quota_group {
 
 struct evl_sched_quota {
 	ktime_t period;
+	struct evl_sched_queue runnable;
 	struct evl_timer refill_timer;
 	struct evl_timer limit_timer;
 	struct list_head groups;
diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
index d1d025e06a5d..7127d4e52df9 100644
--- a/kernel/evl/sched/core.c
+++ b/kernel/evl/sched/core.c
@@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
 		evl_stop_timer(&rq->rrbtimer);
 }
 
-static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
+static __always_inline struct evl_thread *
+__pick_next_thread(struct evl_rq *rq)
 {
 	struct evl_sched_class *sched_class;
 	struct evl_thread *curr = rq->curr;
@@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
 /* rq->curr->lock + rq->lock held, hard irqs off. */
 static struct evl_thread *pick_next_thread(struct evl_rq *rq)
 {
-	struct evl_thread *next = __pick_next_thread(rq);
+	struct evl_thread *next, *prev = rq->curr;
+	struct evl_sched_class *prev_class = prev->sched_class;
 
+	if (prev_class->sched_out)
+		prev_class->sched_out(prev);
+
+	next = __pick_next_thread(rq);
+	trace_evl_pick_thread(next);
 	set_next_running(rq, next);
 
 	return next;
@@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
 		return;
 	}
 
+	prev = curr;
 	next = pick_next_thread(this_rq);
-	trace_evl_pick_thread(next);
-	if (next == curr) {
-		if (unlikely(next->state & EVL_T_ROOT)) {
+	if (next == prev) {
+		if (unlikely(prev->state & EVL_T_ROOT)) {
 			if (this_rq->local_flags & RQ_TPROXY)
 				evl_notify_proxy_tick(this_rq);
 			if (this_rq->local_flags & RQ_TDEFER)
 				evl_program_local_tick(&evl_mono_clock);
 		}
 		raw_spin_unlock(&this_rq->lock);
-		raw_spin_unlock_irqrestore(&curr->lock, flags);
+		raw_spin_unlock_irqrestore(&prev->lock, flags);
 		return;
 	}
 
-	prev = curr;
 	this_rq->curr = next;
 	leaving_inband = false;
 
diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
index 0829da711a66..14748e3a843e 100644
--- a/kernel/evl/sched/quota.c
+++ b/kernel/evl/sched/quota.c
@@ -12,45 +12,35 @@
 #include <uapi/evl/sched-abi.h>
 
 /*
- * With this policy, each per-CPU runqueue maintains a list of active
- * thread groups for the sched_fifo class.
- *
- * Each time a thread is picked from the runqueue, we check whether we
- * still have budget for running it, looking at the group it belongs
- * to. If so, a timer is armed to elapse when that group has no more
- * budget, would the incoming thread run unpreempted until then
- * (i.e. evl_quota->limit_timer).
+ * Each time a thread is picked from the ->runnable queue, we check
+ * whether the group it belongs to still has runtime budget.  If so, a
+ * timer is armed to fire when that group has no more budget, would
+ * the incoming thread run unpreempted until then
+ * (i.e. quota->limit_timer).
  *
  * Otherwise, if no budget remains in the group for running the
  * candidate thread, we move the latter to a local expiry queue
  * maintained by the group. This process is done on the fly as we pull
- * from the runqueue.
+ * from the ->runnable queue.
  *
- * Updating the remaining budget is done each time the EVL core asks
- * for replacing the current thread with the next runnable one,
- * i.e. evl_quota_pick(). There we charge the elapsed run time of the
- * outgoing thread to the relevant group, and conversely, we check
- * whether the incoming thread has budget.
+ * Updating the remaining budget is done each time the EVL core
+ * schedules out a thread undergoing the quota scheduling policy,
  *
- * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
- * ticks in the background, in accordance to the defined quota
- * interval. Thread group budgets get replenished by its handler in
- * accordance to their respective share, pushing all expired threads
- * back to the run queue in the same move.
+ * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
+ * in the background, in accordance to the defined quota interval,
+ * replenishing per-group budgets, pushing all expired threads back to
+ * the quota ->runqueue too.
  *
- * NOTE: since the core logic enforcing the budget entirely happens in
- * evl_quota_pick(), applying a budget change can be done as simply as
- * forcing the rescheduling procedure to be invoked asap. As a result
- * of this, the EVL core will ask for the next thread to run, which
- * means calling evl_quota_pick() eventually.
+ * NOTE: forcing a call to the rescheduling procedure is enoiugh to
+ * apply a budget change.
  *
- * CAUTION: evl_quota_group->nr_active does count both the threads
- * from that group linked to the sched_fifo runqueue, _and_ the
- * threads moved to the local expiry queue. As a matter of fact, the
- * expired threads - those for which we consumed all the per-group
- * budget - are still seen as runnable (i.e. not blocked/suspended) by
- * the EVL core. This only means that the SCHED_QUOTA policy won't
- * pick them until the corresponding budget is replenished.
+ * CAUTION: quota_group->nr_active does count both the threads from
+ * that group linked to the runnable queue, _and_ the threads moved to
+ * the local expiry queue. As a matter of fact, the expired threads -
+ * those for which we consumed all the per-group budget - are still
+ * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
+ * only means that the SCHED_QUOTA policy won't pick them until the
+ * corresponding budget is replenished.
  */
 
 #define MAX_QUOTA_GROUPS  1024
@@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
 
 static LIST_HEAD(group_list);
 
-static inline bool thread_on_quota(struct evl_thread *thread,
-				struct evl_quota_group *tg)
+static inline bool current_on_quota(struct evl_quota_group *tg)
 {
-	/*
-	 * Check whether @thread is running on some CPU, and belongs
-	 * to quota group @tg.
-	 */
-	return thread->quota == tg &&
-		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
+	struct evl_rq *rq = tg->rq;
+	struct evl_thread *curr = rq->curr;
+	struct evl_sched_quota *qs = &rq->quota;
+
+	if (curr->quota != tg)
+		return false;
+
+	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
+		return false;
+
+	return evl_timer_is_running(&qs->limit_timer);
 }
 
 static inline bool group_is_active(struct evl_quota_group *tg)
@@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
 	 * runqueue, in which case tg->nr_active already accounted for
 	 * it.
 	 */
-	return thread_on_quota(tg->rq->curr, tg);
+	return current_on_quota(tg);
 }
 
 static inline void replenish_budget(struct evl_sched_quota *qs,
@@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
 	} else if (tg->run_credit) {
 		credit = ktime_sub(tg->quota_peak, budget);
 		/* Consume the accumulated credit. */
-		if (tg->run_credit >= credit)
+		if (tg->run_credit >= credit) {
 			tg->run_credit =
 				ktime_sub(tg->run_credit, credit);
-		else {
+		} else {
 			credit = tg->run_credit;
 			tg->run_credit = 0;
 		}
@@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
 
 static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
 {
-	struct evl_quota_group *tg;
 	struct evl_thread *thread, *tmp;
+	struct evl_quota_group *tg;
 	struct evl_sched_quota *qs;
 	struct evl_rq *rq;
 
@@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
 		if (tg->run_budget == 0 || list_empty(&tg->expired))
 			continue;
 		/*
-		 * For each group living on this CPU, move all expired
+		 * For each group pinned on this CPU, move all expired
 		 * threads back to the runqueue. Since those threads
 		 * were moved out of the runqueue as we were
 		 * considering them for execution, we push them back
@@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
 		list_for_each_entry_safe_reverse(thread, tmp,
 						&tg->expired, quota_expired) {
 			list_del_init(&thread->quota_expired);
-			evl_add_schedq(&rq->fifo.runnable, thread);
+			evl_add_schedq(&qs->runnable, thread);
 		}
 	}
 
@@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
 	/*
 	 * Force a rescheduling on the return path of the current
 	 * interrupt, so that the budget is re-evaluated for the
-	 * current group in evl_quota_pick().
+	 * current group in quota_pick().
 	 */
 	raw_spin_lock(&rq->lock);
 	evl_set_self_resched(rq);
@@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
 {
 	struct evl_sched_quota *qs = &rq->quota;
 
+	evl_init_schedq(&qs->runnable);
 	qs->period = quota_period;
 	INIT_LIST_HEAD(&qs->groups);
 
@@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
 
 static void quota_kick(struct evl_thread *thread)
 {
+	struct evl_sched_quota *qs = &thread->rq->quota;
 	struct evl_quota_group *tg = thread->quota;
-	struct evl_rq *rq = thread->rq;
 
 	/*
 	 * Allow a kicked thread to be elected for running until it
@@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
 	 */
 	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
 		list_del_init(&thread->quota_expired);
-		evl_add_schedq_tail(&rq->fifo.runnable, thread);
+		evl_add_schedq_tail(&qs->runnable, thread);
 	}
 }
 
@@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
 
 static void quota_enqueue(struct evl_thread *thread)
 {
+	struct evl_sched_quota *qs = &thread->rq->quota;
 	struct evl_quota_group *tg = thread->quota;
-	struct evl_rq *rq = thread->rq;
 
 	if (!thread_is_runnable(thread))
 		list_add_tail(&thread->quota_expired, &tg->expired);
 	else
-		evl_add_schedq_tail(&rq->fifo.runnable, thread);
+		evl_add_schedq_tail(&qs->runnable, thread);
 
 	tg->nr_active++;
 }
 
 static void quota_dequeue(struct evl_thread *thread)
 {
+	struct evl_sched_quota *qs = &thread->rq->quota;
 	struct evl_quota_group *tg = thread->quota;
-	struct evl_rq *rq = thread->rq;
 
 	if (!list_empty(&thread->quota_expired))
 		list_del_init(&thread->quota_expired);
 	else
-		evl_del_schedq(&rq->fifo.runnable, thread);
+		evl_del_schedq(&qs->runnable, thread);
 
 	tg->nr_active--;
 }
 
 static void quota_requeue(struct evl_thread *thread)
 {
+	struct evl_sched_quota *qs = &thread->rq->quota;
 	struct evl_quota_group *tg = thread->quota;
-	struct evl_rq *rq = thread->rq;
 
 	if (!thread_is_runnable(thread))
 		list_add(&thread->quota_expired, &tg->expired);
 	else
-		evl_add_schedq(&rq->fifo.runnable, thread);
+		evl_add_schedq(&qs->runnable, thread);
 
 	tg->nr_active++;
 }
 
-static struct evl_thread *quota_pick(struct evl_rq *rq)
+static void quota_out(struct evl_thread *thread)
 {
-	struct evl_thread *next, *curr = rq->curr;
-	struct evl_sched_quota *qs = &rq->quota;
-	struct evl_quota_group *otg, *tg;
-	ktime_t now, elapsed;
+	struct evl_sched_quota *qs = &thread->rq->quota;
+	struct evl_quota_group *tg = thread->quota;
+	ktime_t now, consumed;
+
+	/* Timer off means that we are not tracking quota. */
+	if (!evl_timer_is_running(&qs->limit_timer))
+		return;
 
-	now = evl_ktime_monotonic();
-	otg = curr->quota;
-	if (otg == NULL)
-		goto pick;
 	/*
 	 * Charge the time consumed by the outgoing thread to the
 	 * group it belongs to.
 	 */
-	elapsed = ktime_sub(now, otg->run_start);
-	if (elapsed < otg->run_budget)
-		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
-	else
-		otg->run_budget = 0;
+	now = evl_ktime_monotonic();
+	consumed = ktime_sub(now, tg->run_start);
+	if (consumed < tg->run_budget) {
+		tg->run_budget = ktime_sub(tg->run_budget, consumed);
+	} else {
+		tg->run_budget = 0;
+		evl_stop_timer(&qs->limit_timer);
+	}
+}
+
+static struct evl_thread *quota_pick(struct evl_rq *rq)
+{
+	struct evl_thread *next, *curr = rq->curr;
+	struct evl_sched_quota *qs = &rq->quota;
+	struct evl_quota_group *tg;
+
 pick:
-	next = evl_get_schedq(&rq->fifo.runnable);
+	next = evl_get_schedq(&qs->runnable);
 	if (next == NULL) {
 		evl_stop_timer(&qs->limit_timer);
 		return NULL;
 	}
 
-	/*
-	 * As we basically piggyback on the SCHED_FIFO runqueue, make
-	 * sure to detect non-quota threads.
-	 */
 	tg = next->quota;
-	if (tg == NULL)
-		return next;
-
-	tg->run_start = now;
+	tg->nr_active--;
 
 	/*
 	 * Don't consider budget if kicked, we have to allow this
@@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
 	 */
 	if (next->info & EVL_T_KICKED) {
 		evl_stop_timer(&qs->limit_timer);
-		goto out;
+		return next;
 	}
 
+	/*
+	 * __pick_next_thread() might have requeued the current
+	 * thread which is still leading the pack.
+	 */
+	if (curr == next)
+		return next;
+
 	if (ktime_to_ns(tg->run_budget) == 0) {
-		/* Flush expired group members as we go. */
+		/* Park expired group members as we go. */
 		list_add_tail(&next->quota_expired, &tg->expired);
 		goto pick;
 	}
 
-	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
-		/* Same group, leave the running timer untouched. */
-		goto out;
-
-	/* Arm limit timer for the new running group. */
-	evl_start_timer(&qs->limit_timer,
-			ktime_add(now, tg->run_budget),
-			EVL_INFINITE);
-out:
-	tg->nr_active--;
+	/* Arm new limit timer on change of running group. */
+	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
+		tg->run_start = evl_ktime_monotonic();
+		evl_start_timer(&qs->limit_timer,
+				ktime_add(tg->run_start, tg->run_budget),
+				EVL_INFINITE);
+	}
 
 	return next;
 }
@@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
 	 * Unregister the group before we drop rq->lock. As a result,
 	 * it won't accept threads anymore while we are busy moving
 	 * the current members to the fifo class, and concurrent
-	 * evl_quota_remove requests would receive -EINVAL.
+	 * quota_remove requests would receive -EINVAL.
 	 */
 	__clear_bit(tg->tgid, group_map);
 	list_del(&tg->next);
@@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
 	 * hold rq->lock on entry, we do a trylock dance to prevent an
 	 * ABBA issue. No livelock is possible since we unregistered
 	 * that group already, so &tg->members can only be depleted
-	 * (by this loop specifically).
+	 * (by this loop exclusively).
 	 */
 
 	while (!list_empty(&tg->members)) {
@@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
 			int *quota_sum_r)
 {
 	struct evl_rq *rq = tg->rq;
-	struct evl_thread *thread, *tmp, *curr = rq->curr;
+	struct evl_thread *thread, *tmp;
 	struct evl_sched_quota *qs = &rq->quota;
-	ktime_t now, elapsed, consumed;
 	ktime_t old_quota = tg->quota;
+	ktime_t consumed;
 	u64 n;
 
 	assert_hard_lock(&rq->lock);
@@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
 	tg->quota_percent = quota_percent;
 	tg->quota_peak_percent = quota_peak_percent;
 
-	if (thread_on_quota(curr, tg)) {
-		now = evl_ktime_monotonic();
-
-		elapsed = now - tg->run_start;
-		if (elapsed < tg->run_budget)
-			tg->run_budget -= elapsed;
-		else
-			tg->run_budget = 0;
-
-		tg->run_start = now;
+	if (current_on_quota(tg)) {
+		quota_out(rq->curr);
 		evl_stop_timer(&qs->limit_timer);
 	}
 
@@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
 		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
 						quota_expired) {
 			list_del_init(&thread->quota_expired);
-			evl_add_schedq(&rq->fifo.runnable, thread);
+			evl_add_schedq(&qs->runnable, thread);
 		}
 	}
 
@@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
 	.sched_dequeue		=	quota_dequeue,
 	.sched_requeue		=	quota_requeue,
 	.sched_pick		=	quota_pick,
+	.sched_out		=	quota_out,
 	.sched_migrate		=	quota_migrate,
 	.sched_chkparam		=	quota_chkparam,
 	.sched_setparam		=	quota_setparam,

-- 
Philippe.

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  6:42   ` Philippe Gerum
@ 2026-06-15  6:44     ` Jan Kiszka
  2026-06-15  6:46     ` Jan Kiszka
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 24+ messages in thread
From: Jan Kiszka @ 2026-06-15  6:44 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: Xenomai

On 15.06.26 08:42, Philippe Gerum wrote:
> Philippe Gerum <rpm@xenomai.org> writes:
> 
>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>
>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>
>>> This shall be invoked before a thread switch, providing both the current
>>> and the next thread as arguments. Some scheduling classes may need it to
>>> correctly handle their state as the sched_pick may not be invoked when a
>>> higher-weighted class is providing the next thread.
>>>
>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> ---
>>>  include/evl/sched.h     | 2 ++
>>>  kernel/evl/sched/core.c | 5 +++++
>>>  2 files changed, 7 insertions(+)
>>>
>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>> index ae9690860146c..0b16f1b1cf626 100644
>>> --- a/include/evl/sched.h
>>> +++ b/include/evl/sched.h
>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>> +	void (*sched_out)(struct evl_thread *thread,
>>> +			  struct evl_thread *next);
>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>  			      struct evl_rq *rq);
>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>> index eb133e334d30f..0d49fc16bd67e 100644
>>> --- a/kernel/evl/sched/core.c
>>> +++ b/kernel/evl/sched/core.c
>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>   */
>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>  {
>>> +	struct evl_sched_class *prev_schedclass;
>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>  	struct evl_thread *prev, *next, *curr;
>>>  	bool leaving_inband, inband_tail;
>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>  	this_rq->curr = next;
>>>  	leaving_inband = false;
>>>  
>>> +	prev_schedclass = prev->sched_class;
>>> +	if (prev_schedclass->sched_out)
>>> +		prev_schedclass->sched_out(prev, next);
>>> +
>>>  	/*
>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>  	 * performing the rest of the context switch code
>>
>> I've been working on this lately too. It turns out that we need more
>> than this, although this is definitely part of the solution. I'll follow
>> up on this issue.
> 
> This is still wip, I'm sharing this early to discuss details for
> reconciling both proposals (the hunk in __evl_schedule() is merely
> cosmetic, no functional change).
> 
> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
> Author: Philippe Gerum <rpm@xenomai.org>
> Date:   Sun Jun 14 12:08:59 2026 +0200
> 
>     evl: sched/quota: fix budget tracking on preemption
>     
>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>     runtime budget of the former is inaccurately tracked. This is due to
>     the fifo->pick() handler returning a valid thread, which prevents the
>     quota->pick() handler from being called. As a result, the last runtime
>     period of the outgoing thread is not accounted for.
>     
>     To fix this, we introduce a new sched_out() handler which is called
>     for the outgoing thread, which the quota policy uses to update the
>     remaining budget of preempted threads appropriately. In addition, the
>     implementation no longer shares the runnable thread queue with
>     SCHED_FIFO.
>     
>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
> 
> diff --git a/include/evl/sched.h b/include/evl/sched.h
> index ae9690860146..cc824c28004b 100644
> --- a/include/evl/sched.h
> +++ b/include/evl/sched.h
> @@ -120,6 +120,7 @@ struct evl_sched_class {
>  	void (*sched_dequeue)(struct evl_thread *thread);
>  	void (*sched_requeue)(struct evl_thread *thread);
>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
> +	void (*sched_out)(struct evl_thread *thread);
>  	void (*sched_yield)(struct evl_thread *thread);
>  	void (*sched_migrate)(struct evl_thread *thread,
>  			      struct evl_rq *rq);
> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
> index dfe3b7390958..dc8416645da8 100644
> --- a/include/evl/sched/quota.h
> +++ b/include/evl/sched/quota.h
> @@ -40,6 +40,7 @@ struct evl_quota_group {
>  
>  struct evl_sched_quota {
>  	ktime_t period;
> +	struct evl_sched_queue runnable;
>  	struct evl_timer refill_timer;
>  	struct evl_timer limit_timer;
>  	struct list_head groups;
> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
> index d1d025e06a5d..7127d4e52df9 100644
> --- a/kernel/evl/sched/core.c
> +++ b/kernel/evl/sched/core.c
> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>  		evl_stop_timer(&rq->rrbtimer);
>  }
>  
> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
> +static __always_inline struct evl_thread *
> +__pick_next_thread(struct evl_rq *rq)
>  {
>  	struct evl_sched_class *sched_class;
>  	struct evl_thread *curr = rq->curr;
> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>  {
> -	struct evl_thread *next = __pick_next_thread(rq);
> +	struct evl_thread *next, *prev = rq->curr;
> +	struct evl_sched_class *prev_class = prev->sched_class;
>  
> +	if (prev_class->sched_out)
> +		prev_class->sched_out(prev);
> +
> +	next = __pick_next_thread(rq);
> +	trace_evl_pick_thread(next);
>  	set_next_running(rq, next);
>  
>  	return next;
> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>  		return;
>  	}
>  
> +	prev = curr;
>  	next = pick_next_thread(this_rq);
> -	trace_evl_pick_thread(next);
> -	if (next == curr) {
> -		if (unlikely(next->state & EVL_T_ROOT)) {
> +	if (next == prev) {
> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>  			if (this_rq->local_flags & RQ_TPROXY)
>  				evl_notify_proxy_tick(this_rq);
>  			if (this_rq->local_flags & RQ_TDEFER)
>  				evl_program_local_tick(&evl_mono_clock);
>  		}
>  		raw_spin_unlock(&this_rq->lock);
> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>  		return;
>  	}
>  
> -	prev = curr;
>  	this_rq->curr = next;
>  	leaving_inband = false;
>  
> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
> index 0829da711a66..14748e3a843e 100644
> --- a/kernel/evl/sched/quota.c
> +++ b/kernel/evl/sched/quota.c
> @@ -12,45 +12,35 @@
>  #include <uapi/evl/sched-abi.h>
>  
>  /*
> - * With this policy, each per-CPU runqueue maintains a list of active
> - * thread groups for the sched_fifo class.
> - *
> - * Each time a thread is picked from the runqueue, we check whether we
> - * still have budget for running it, looking at the group it belongs
> - * to. If so, a timer is armed to elapse when that group has no more
> - * budget, would the incoming thread run unpreempted until then
> - * (i.e. evl_quota->limit_timer).
> + * Each time a thread is picked from the ->runnable queue, we check
> + * whether the group it belongs to still has runtime budget.  If so, a
> + * timer is armed to fire when that group has no more budget, would
> + * the incoming thread run unpreempted until then
> + * (i.e. quota->limit_timer).
>   *
>   * Otherwise, if no budget remains in the group for running the
>   * candidate thread, we move the latter to a local expiry queue
>   * maintained by the group. This process is done on the fly as we pull
> - * from the runqueue.
> + * from the ->runnable queue.
>   *
> - * Updating the remaining budget is done each time the EVL core asks
> - * for replacing the current thread with the next runnable one,
> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
> - * outgoing thread to the relevant group, and conversely, we check
> - * whether the incoming thread has budget.
> + * Updating the remaining budget is done each time the EVL core
> + * schedules out a thread undergoing the quota scheduling policy,
>   *
> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
> - * ticks in the background, in accordance to the defined quota
> - * interval. Thread group budgets get replenished by its handler in
> - * accordance to their respective share, pushing all expired threads
> - * back to the run queue in the same move.
> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
> + * in the background, in accordance to the defined quota interval,
> + * replenishing per-group budgets, pushing all expired threads back to
> + * the quota ->runqueue too.
>   *
> - * NOTE: since the core logic enforcing the budget entirely happens in
> - * evl_quota_pick(), applying a budget change can be done as simply as
> - * forcing the rescheduling procedure to be invoked asap. As a result
> - * of this, the EVL core will ask for the next thread to run, which
> - * means calling evl_quota_pick() eventually.
> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
> + * apply a budget change.
>   *
> - * CAUTION: evl_quota_group->nr_active does count both the threads
> - * from that group linked to the sched_fifo runqueue, _and_ the
> - * threads moved to the local expiry queue. As a matter of fact, the
> - * expired threads - those for which we consumed all the per-group
> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
> - * the EVL core. This only means that the SCHED_QUOTA policy won't
> - * pick them until the corresponding budget is replenished.
> + * CAUTION: quota_group->nr_active does count both the threads from
> + * that group linked to the runnable queue, _and_ the threads moved to
> + * the local expiry queue. As a matter of fact, the expired threads -
> + * those for which we consumed all the per-group budget - are still
> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
> + * only means that the SCHED_QUOTA policy won't pick them until the
> + * corresponding budget is replenished.
>   */
>  
>  #define MAX_QUOTA_GROUPS  1024
> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>  
>  static LIST_HEAD(group_list);
>  
> -static inline bool thread_on_quota(struct evl_thread *thread,
> -				struct evl_quota_group *tg)
> +static inline bool current_on_quota(struct evl_quota_group *tg)
>  {
> -	/*
> -	 * Check whether @thread is running on some CPU, and belongs
> -	 * to quota group @tg.
> -	 */
> -	return thread->quota == tg &&
> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
> +	struct evl_rq *rq = tg->rq;
> +	struct evl_thread *curr = rq->curr;
> +	struct evl_sched_quota *qs = &rq->quota;
> +
> +	if (curr->quota != tg)
> +		return false;
> +
> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
> +		return false;
> +
> +	return evl_timer_is_running(&qs->limit_timer);
>  }
>  
>  static inline bool group_is_active(struct evl_quota_group *tg)
> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>  	 * runqueue, in which case tg->nr_active already accounted for
>  	 * it.
>  	 */
> -	return thread_on_quota(tg->rq->curr, tg);
> +	return current_on_quota(tg);
>  }
>  
>  static inline void replenish_budget(struct evl_sched_quota *qs,
> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>  	} else if (tg->run_credit) {
>  		credit = ktime_sub(tg->quota_peak, budget);
>  		/* Consume the accumulated credit. */
> -		if (tg->run_credit >= credit)
> +		if (tg->run_credit >= credit) {
>  			tg->run_credit =
>  				ktime_sub(tg->run_credit, credit);
> -		else {
> +		} else {
>  			credit = tg->run_credit;
>  			tg->run_credit = 0;
>  		}
> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>  
>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>  {
> -	struct evl_quota_group *tg;
>  	struct evl_thread *thread, *tmp;
> +	struct evl_quota_group *tg;
>  	struct evl_sched_quota *qs;
>  	struct evl_rq *rq;
>  
> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>  			continue;
>  		/*
> -		 * For each group living on this CPU, move all expired
> +		 * For each group pinned on this CPU, move all expired
>  		 * threads back to the runqueue. Since those threads
>  		 * were moved out of the runqueue as we were
>  		 * considering them for execution, we push them back
> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>  		list_for_each_entry_safe_reverse(thread, tmp,
>  						&tg->expired, quota_expired) {
>  			list_del_init(&thread->quota_expired);
> -			evl_add_schedq(&rq->fifo.runnable, thread);
> +			evl_add_schedq(&qs->runnable, thread);
>  		}
>  	}
>  
> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>  	/*
>  	 * Force a rescheduling on the return path of the current
>  	 * interrupt, so that the budget is re-evaluated for the
> -	 * current group in evl_quota_pick().
> +	 * current group in quota_pick().
>  	 */
>  	raw_spin_lock(&rq->lock);
>  	evl_set_self_resched(rq);
> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>  {
>  	struct evl_sched_quota *qs = &rq->quota;
>  
> +	evl_init_schedq(&qs->runnable);
>  	qs->period = quota_period;
>  	INIT_LIST_HEAD(&qs->groups);
>  
> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>  
>  static void quota_kick(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	/*
>  	 * Allow a kicked thread to be elected for running until it
> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>  	 */
>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>  		list_del_init(&thread->quota_expired);
> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
> +		evl_add_schedq_tail(&qs->runnable, thread);
>  	}
>  }
>  
> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>  
>  static void quota_enqueue(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	if (!thread_is_runnable(thread))
>  		list_add_tail(&thread->quota_expired, &tg->expired);
>  	else
> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
> +		evl_add_schedq_tail(&qs->runnable, thread);
>  
>  	tg->nr_active++;
>  }
>  
>  static void quota_dequeue(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	if (!list_empty(&thread->quota_expired))
>  		list_del_init(&thread->quota_expired);
>  	else
> -		evl_del_schedq(&rq->fifo.runnable, thread);
> +		evl_del_schedq(&qs->runnable, thread);
>  
>  	tg->nr_active--;
>  }
>  
>  static void quota_requeue(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	if (!thread_is_runnable(thread))
>  		list_add(&thread->quota_expired, &tg->expired);
>  	else
> -		evl_add_schedq(&rq->fifo.runnable, thread);
> +		evl_add_schedq(&qs->runnable, thread);
>  
>  	tg->nr_active++;
>  }
>  
> -static struct evl_thread *quota_pick(struct evl_rq *rq)
> +static void quota_out(struct evl_thread *thread)
>  {
> -	struct evl_thread *next, *curr = rq->curr;
> -	struct evl_sched_quota *qs = &rq->quota;
> -	struct evl_quota_group *otg, *tg;
> -	ktime_t now, elapsed;
> +	struct evl_sched_quota *qs = &thread->rq->quota;
> +	struct evl_quota_group *tg = thread->quota;
> +	ktime_t now, consumed;
> +
> +	/* Timer off means that we are not tracking quota. */
> +	if (!evl_timer_is_running(&qs->limit_timer))
> +		return;
>  
> -	now = evl_ktime_monotonic();
> -	otg = curr->quota;
> -	if (otg == NULL)
> -		goto pick;
>  	/*
>  	 * Charge the time consumed by the outgoing thread to the
>  	 * group it belongs to.
>  	 */
> -	elapsed = ktime_sub(now, otg->run_start);
> -	if (elapsed < otg->run_budget)
> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
> -	else
> -		otg->run_budget = 0;
> +	now = evl_ktime_monotonic();
> +	consumed = ktime_sub(now, tg->run_start);
> +	if (consumed < tg->run_budget) {
> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
> +	} else {
> +		tg->run_budget = 0;
> +		evl_stop_timer(&qs->limit_timer);
> +	}
> +}
> +
> +static struct evl_thread *quota_pick(struct evl_rq *rq)
> +{
> +	struct evl_thread *next, *curr = rq->curr;
> +	struct evl_sched_quota *qs = &rq->quota;
> +	struct evl_quota_group *tg;
> +
>  pick:
> -	next = evl_get_schedq(&rq->fifo.runnable);
> +	next = evl_get_schedq(&qs->runnable);
>  	if (next == NULL) {
>  		evl_stop_timer(&qs->limit_timer);
>  		return NULL;
>  	}
>  
> -	/*
> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
> -	 * sure to detect non-quota threads.
> -	 */
>  	tg = next->quota;
> -	if (tg == NULL)
> -		return next;
> -
> -	tg->run_start = now;
> +	tg->nr_active--;
>  
>  	/*
>  	 * Don't consider budget if kicked, we have to allow this
> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>  	 */
>  	if (next->info & EVL_T_KICKED) {
>  		evl_stop_timer(&qs->limit_timer);
> -		goto out;
> +		return next;
>  	}
>  
> +	/*
> +	 * __pick_next_thread() might have requeued the current
> +	 * thread which is still leading the pack.
> +	 */
> +	if (curr == next)
> +		return next;
> +
>  	if (ktime_to_ns(tg->run_budget) == 0) {
> -		/* Flush expired group members as we go. */
> +		/* Park expired group members as we go. */
>  		list_add_tail(&next->quota_expired, &tg->expired);
>  		goto pick;
>  	}
>  
> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
> -		/* Same group, leave the running timer untouched. */
> -		goto out;
> -
> -	/* Arm limit timer for the new running group. */
> -	evl_start_timer(&qs->limit_timer,
> -			ktime_add(now, tg->run_budget),
> -			EVL_INFINITE);
> -out:
> -	tg->nr_active--;
> +	/* Arm new limit timer on change of running group. */
> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
> +		tg->run_start = evl_ktime_monotonic();
> +		evl_start_timer(&qs->limit_timer,
> +				ktime_add(tg->run_start, tg->run_budget),
> +				EVL_INFINITE);
> +	}
>  
>  	return next;
>  }
> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>  	 * Unregister the group before we drop rq->lock. As a result,
>  	 * it won't accept threads anymore while we are busy moving
>  	 * the current members to the fifo class, and concurrent
> -	 * evl_quota_remove requests would receive -EINVAL.
> +	 * quota_remove requests would receive -EINVAL.
>  	 */
>  	__clear_bit(tg->tgid, group_map);
>  	list_del(&tg->next);
> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>  	 * ABBA issue. No livelock is possible since we unregistered
>  	 * that group already, so &tg->members can only be depleted
> -	 * (by this loop specifically).
> +	 * (by this loop exclusively).
>  	 */
>  
>  	while (!list_empty(&tg->members)) {
> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>  			int *quota_sum_r)
>  {
>  	struct evl_rq *rq = tg->rq;
> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
> +	struct evl_thread *thread, *tmp;
>  	struct evl_sched_quota *qs = &rq->quota;
> -	ktime_t now, elapsed, consumed;
>  	ktime_t old_quota = tg->quota;
> +	ktime_t consumed;
>  	u64 n;
>  
>  	assert_hard_lock(&rq->lock);
> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>  	tg->quota_percent = quota_percent;
>  	tg->quota_peak_percent = quota_peak_percent;
>  
> -	if (thread_on_quota(curr, tg)) {
> -		now = evl_ktime_monotonic();
> -
> -		elapsed = now - tg->run_start;
> -		if (elapsed < tg->run_budget)
> -			tg->run_budget -= elapsed;
> -		else
> -			tg->run_budget = 0;
> -
> -		tg->run_start = now;
> +	if (current_on_quota(tg)) {
> +		quota_out(rq->curr);
>  		evl_stop_timer(&qs->limit_timer);
>  	}
>  
> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>  						quota_expired) {
>  			list_del_init(&thread->quota_expired);
> -			evl_add_schedq(&rq->fifo.runnable, thread);
> +			evl_add_schedq(&qs->runnable, thread);
>  		}
>  	}
>  
> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>  	.sched_dequeue		=	quota_dequeue,
>  	.sched_requeue		=	quota_requeue,
>  	.sched_pick		=	quota_pick,
> +	.sched_out		=	quota_out,
>  	.sched_migrate		=	quota_migrate,
>  	.sched_chkparam		=	quota_chkparam,
>  	.sched_setparam		=	quota_setparam,
> 

Could you summarize the key difference to my patch 2? Which preemption
or accounting cases did you additionally identify?

Jan

-- 
Siemens AG, Foundational Technologies
Linux Expert Center

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  6:42   ` Philippe Gerum
  2026-06-15  6:44     ` Jan Kiszka
@ 2026-06-15  6:46     ` Jan Kiszka
  2026-06-15  7:14       ` Philippe Gerum
  2026-06-15  6:58     ` Jan Kiszka
  2026-06-15  9:04     ` Jan Kiszka
  3 siblings, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2026-06-15  6:46 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: Xenomai

On 15.06.26 08:42, Philippe Gerum wrote:
> Philippe Gerum <rpm@xenomai.org> writes:
> 
>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>
>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>
>>> This shall be invoked before a thread switch, providing both the current
>>> and the next thread as arguments. Some scheduling classes may need it to
>>> correctly handle their state as the sched_pick may not be invoked when a
>>> higher-weighted class is providing the next thread.
>>>
>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> ---
>>>  include/evl/sched.h     | 2 ++
>>>  kernel/evl/sched/core.c | 5 +++++
>>>  2 files changed, 7 insertions(+)
>>>
>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>> index ae9690860146c..0b16f1b1cf626 100644
>>> --- a/include/evl/sched.h
>>> +++ b/include/evl/sched.h
>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>> +	void (*sched_out)(struct evl_thread *thread,
>>> +			  struct evl_thread *next);
>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>  			      struct evl_rq *rq);
>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>> index eb133e334d30f..0d49fc16bd67e 100644
>>> --- a/kernel/evl/sched/core.c
>>> +++ b/kernel/evl/sched/core.c
>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>   */
>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>  {
>>> +	struct evl_sched_class *prev_schedclass;
>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>  	struct evl_thread *prev, *next, *curr;
>>>  	bool leaving_inband, inband_tail;
>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>  	this_rq->curr = next;
>>>  	leaving_inband = false;
>>>  
>>> +	prev_schedclass = prev->sched_class;
>>> +	if (prev_schedclass->sched_out)
>>> +		prev_schedclass->sched_out(prev, next);
>>> +
>>>  	/*
>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>  	 * performing the rest of the context switch code
>>
>> I've been working on this lately too. It turns out that we need more
>> than this, although this is definitely part of the solution. I'll follow
>> up on this issue.
> 
> This is still wip, I'm sharing this early to discuss details for
> reconciling both proposals (the hunk in __evl_schedule() is merely
> cosmetic, no functional change).
> 
> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
> Author: Philippe Gerum <rpm@xenomai.org>
> Date:   Sun Jun 14 12:08:59 2026 +0200
> 
>     evl: sched/quota: fix budget tracking on preemption
>     
>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>     runtime budget of the former is inaccurately tracked. This is due to
>     the fifo->pick() handler returning a valid thread, which prevents the
>     quota->pick() handler from being called. As a result, the last runtime
>     period of the outgoing thread is not accounted for.
>     
>     To fix this, we introduce a new sched_out() handler which is called
>     for the outgoing thread, which the quota policy uses to update the
>     remaining budget of preempted threads appropriately. In addition, the
>     implementation no longer shares the runnable thread queue with
>     SCHED_FIFO.
>     
>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
> 
> diff --git a/include/evl/sched.h b/include/evl/sched.h
> index ae9690860146..cc824c28004b 100644
> --- a/include/evl/sched.h
> +++ b/include/evl/sched.h
> @@ -120,6 +120,7 @@ struct evl_sched_class {
>  	void (*sched_dequeue)(struct evl_thread *thread);
>  	void (*sched_requeue)(struct evl_thread *thread);
>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
> +	void (*sched_out)(struct evl_thread *thread);

The out handler also needs the target thread in order to identify if
there is a tg change or not.

Jan

-- 
Siemens AG, Foundational Technologies
Linux Expert Center

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  6:46     ` Jan Kiszka
@ 2026-06-15  7:14       ` Philippe Gerum
  2026-06-15  7:21         ` Jan Kiszka
  0 siblings, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  7:14 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 15.06.26 08:42, Philippe Gerum wrote:
>> Philippe Gerum <rpm@xenomai.org> writes:
>> 
>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>
>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>
>>>> This shall be invoked before a thread switch, providing both the current
>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>> higher-weighted class is providing the next thread.
>>>>
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>> ---
>>>>  include/evl/sched.h     | 2 ++
>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>  2 files changed, 7 insertions(+)
>>>>
>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>> --- a/include/evl/sched.h
>>>> +++ b/include/evl/sched.h
>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>> +			  struct evl_thread *next);
>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>  			      struct evl_rq *rq);
>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>> --- a/kernel/evl/sched/core.c
>>>> +++ b/kernel/evl/sched/core.c
>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>   */
>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>  {
>>>> +	struct evl_sched_class *prev_schedclass;
>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>  	struct evl_thread *prev, *next, *curr;
>>>>  	bool leaving_inband, inband_tail;
>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>  	this_rq->curr = next;
>>>>  	leaving_inband = false;
>>>>  
>>>> +	prev_schedclass = prev->sched_class;
>>>> +	if (prev_schedclass->sched_out)
>>>> +		prev_schedclass->sched_out(prev, next);
>>>> +
>>>>  	/*
>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>  	 * performing the rest of the context switch code
>>>
>>> I've been working on this lately too. It turns out that we need more
>>> than this, although this is definitely part of the solution. I'll follow
>>> up on this issue.
>> 
>> This is still wip, I'm sharing this early to discuss details for
>> reconciling both proposals (the hunk in __evl_schedule() is merely
>> cosmetic, no functional change).
>> 
>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>> Author: Philippe Gerum <rpm@xenomai.org>
>> Date:   Sun Jun 14 12:08:59 2026 +0200
>> 
>>     evl: sched/quota: fix budget tracking on preemption
>>     
>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>     runtime budget of the former is inaccurately tracked. This is due to
>>     the fifo->pick() handler returning a valid thread, which prevents the
>>     quota->pick() handler from being called. As a result, the last runtime
>>     period of the outgoing thread is not accounted for.
>>     
>>     To fix this, we introduce a new sched_out() handler which is called
>>     for the outgoing thread, which the quota policy uses to update the
>>     remaining budget of preempted threads appropriately. In addition, the
>>     implementation no longer shares the runnable thread queue with
>>     SCHED_FIFO.
>>     
>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>> 
>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>> index ae9690860146..cc824c28004b 100644
>> --- a/include/evl/sched.h
>> +++ b/include/evl/sched.h
>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>  	void (*sched_requeue)(struct evl_thread *thread);
>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>> +	void (*sched_out)(struct evl_thread *thread);
>
> The out handler also needs the target thread in order to identify if
> there is a tg change or not.
>

Your implementation needs this because the sequence is pick() -> out(),
mine is out() -> pick(), with the latter using the budget updated by the
former to figure out what to do next.

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  7:14       ` Philippe Gerum
@ 2026-06-15  7:21         ` Jan Kiszka
  2026-06-15  7:29           ` Philippe Gerum
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2026-06-15  7:21 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: Xenomai

On 15.06.26 09:14, Philippe Gerum wrote:
> Jan Kiszka <jan.kiszka@siemens.com> writes:
> 
>> On 15.06.26 08:42, Philippe Gerum wrote:
>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>
>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>
>>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>
>>>>> This shall be invoked before a thread switch, providing both the current
>>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>>> higher-weighted class is providing the next thread.
>>>>>
>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>> ---
>>>>>  include/evl/sched.h     | 2 ++
>>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>>  2 files changed, 7 insertions(+)
>>>>>
>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>>> --- a/include/evl/sched.h
>>>>> +++ b/include/evl/sched.h
>>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>>> +			  struct evl_thread *next);
>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>  			      struct evl_rq *rq);
>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>>> --- a/kernel/evl/sched/core.c
>>>>> +++ b/kernel/evl/sched/core.c
>>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>>   */
>>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>  {
>>>>> +	struct evl_sched_class *prev_schedclass;
>>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>>  	struct evl_thread *prev, *next, *curr;
>>>>>  	bool leaving_inband, inband_tail;
>>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>  	this_rq->curr = next;
>>>>>  	leaving_inband = false;
>>>>>  
>>>>> +	prev_schedclass = prev->sched_class;
>>>>> +	if (prev_schedclass->sched_out)
>>>>> +		prev_schedclass->sched_out(prev, next);
>>>>> +
>>>>>  	/*
>>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>>  	 * performing the rest of the context switch code
>>>>
>>>> I've been working on this lately too. It turns out that we need more
>>>> than this, although this is definitely part of the solution. I'll follow
>>>> up on this issue.
>>>
>>> This is still wip, I'm sharing this early to discuss details for
>>> reconciling both proposals (the hunk in __evl_schedule() is merely
>>> cosmetic, no functional change).
>>>
>>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>>> Author: Philippe Gerum <rpm@xenomai.org>
>>> Date:   Sun Jun 14 12:08:59 2026 +0200
>>>
>>>     evl: sched/quota: fix budget tracking on preemption
>>>     
>>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>>     runtime budget of the former is inaccurately tracked. This is due to
>>>     the fifo->pick() handler returning a valid thread, which prevents the
>>>     quota->pick() handler from being called. As a result, the last runtime
>>>     period of the outgoing thread is not accounted for.
>>>     
>>>     To fix this, we introduce a new sched_out() handler which is called
>>>     for the outgoing thread, which the quota policy uses to update the
>>>     remaining budget of preempted threads appropriately. In addition, the
>>>     implementation no longer shares the runnable thread queue with
>>>     SCHED_FIFO.
>>>     
>>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>>>
>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>> index ae9690860146..cc824c28004b 100644
>>> --- a/include/evl/sched.h
>>> +++ b/include/evl/sched.h
>>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>> +	void (*sched_out)(struct evl_thread *thread);
>>
>> The out handler also needs the target thread in order to identify if
>> there is a tg change or not.
>>
> 
> Your implementation needs this because the sequence is pick() -> out(),
> mine is out() -> pick(), with the latter using the budget updated by the
> former to figure out what to do next.

But that will call out even on old == new - does not sound right.

Jan

-- 
Siemens AG, Foundational Technologies
Linux Expert Center

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  7:21         ` Jan Kiszka
@ 2026-06-15  7:29           ` Philippe Gerum
  0 siblings, 0 replies; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  7:29 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 15.06.26 09:14, Philippe Gerum wrote:
>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>> 
>>> On 15.06.26 08:42, Philippe Gerum wrote:
>>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>>
>>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>>
>>>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>
>>>>>> This shall be invoked before a thread switch, providing both the current
>>>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>>>> higher-weighted class is providing the next thread.
>>>>>>
>>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>> ---
>>>>>>  include/evl/sched.h     | 2 ++
>>>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>>>  2 files changed, 7 insertions(+)
>>>>>>
>>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>>>> --- a/include/evl/sched.h
>>>>>> +++ b/include/evl/sched.h
>>>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>>>> +			  struct evl_thread *next);
>>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>>  			      struct evl_rq *rq);
>>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>>>> --- a/kernel/evl/sched/core.c
>>>>>> +++ b/kernel/evl/sched/core.c
>>>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>>>   */
>>>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>  {
>>>>>> +	struct evl_sched_class *prev_schedclass;
>>>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>>>  	struct evl_thread *prev, *next, *curr;
>>>>>>  	bool leaving_inband, inband_tail;
>>>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>  	this_rq->curr = next;
>>>>>>  	leaving_inband = false;
>>>>>>  
>>>>>> +	prev_schedclass = prev->sched_class;
>>>>>> +	if (prev_schedclass->sched_out)
>>>>>> +		prev_schedclass->sched_out(prev, next);
>>>>>> +
>>>>>>  	/*
>>>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>>>  	 * performing the rest of the context switch code
>>>>>
>>>>> I've been working on this lately too. It turns out that we need more
>>>>> than this, although this is definitely part of the solution. I'll follow
>>>>> up on this issue.
>>>>
>>>> This is still wip, I'm sharing this early to discuss details for
>>>> reconciling both proposals (the hunk in __evl_schedule() is merely
>>>> cosmetic, no functional change).
>>>>
>>>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>>>> Author: Philippe Gerum <rpm@xenomai.org>
>>>> Date:   Sun Jun 14 12:08:59 2026 +0200
>>>>
>>>>     evl: sched/quota: fix budget tracking on preemption
>>>>     
>>>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>>>     runtime budget of the former is inaccurately tracked. This is due to
>>>>     the fifo->pick() handler returning a valid thread, which prevents the
>>>>     quota->pick() handler from being called. As a result, the last runtime
>>>>     period of the outgoing thread is not accounted for.
>>>>     
>>>>     To fix this, we introduce a new sched_out() handler which is called
>>>>     for the outgoing thread, which the quota policy uses to update the
>>>>     remaining budget of preempted threads appropriately. In addition, the
>>>>     implementation no longer shares the runnable thread queue with
>>>>     SCHED_FIFO.
>>>>     
>>>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>>>>
>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>> index ae9690860146..cc824c28004b 100644
>>>> --- a/include/evl/sched.h
>>>> +++ b/include/evl/sched.h
>>>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>> +	void (*sched_out)(struct evl_thread *thread);
>>>
>>> The out handler also needs the target thread in order to identify if
>>> there is a tg change or not.
>>>
>> 
>> Your implementation needs this because the sequence is pick() -> out(),
>> mine is out() -> pick(), with the latter using the budget updated by the
>> former to figure out what to do next.
>
> But that will call out even on old == new - does not sound right.
>

Call to determine that next ends up being prev, then filtered out by
quota_pick() as a consequence. This is strictly identical
performance-wise.

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  6:42   ` Philippe Gerum
  2026-06-15  6:44     ` Jan Kiszka
  2026-06-15  6:46     ` Jan Kiszka
@ 2026-06-15  6:58     ` Jan Kiszka
  2026-06-15  7:17       ` Philippe Gerum
  2026-06-15  8:09       ` Philippe Gerum
  2026-06-15  9:04     ` Jan Kiszka
  3 siblings, 2 replies; 24+ messages in thread
From: Jan Kiszka @ 2026-06-15  6:58 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: Xenomai

On 15.06.26 08:42, Philippe Gerum wrote:
> Philippe Gerum <rpm@xenomai.org> writes:
> 
>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>
>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>
>>> This shall be invoked before a thread switch, providing both the current
>>> and the next thread as arguments. Some scheduling classes may need it to
>>> correctly handle their state as the sched_pick may not be invoked when a
>>> higher-weighted class is providing the next thread.
>>>
>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> ---
>>>  include/evl/sched.h     | 2 ++
>>>  kernel/evl/sched/core.c | 5 +++++
>>>  2 files changed, 7 insertions(+)
>>>
>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>> index ae9690860146c..0b16f1b1cf626 100644
>>> --- a/include/evl/sched.h
>>> +++ b/include/evl/sched.h
>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>> +	void (*sched_out)(struct evl_thread *thread,
>>> +			  struct evl_thread *next);
>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>  			      struct evl_rq *rq);
>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>> index eb133e334d30f..0d49fc16bd67e 100644
>>> --- a/kernel/evl/sched/core.c
>>> +++ b/kernel/evl/sched/core.c
>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>   */
>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>  {
>>> +	struct evl_sched_class *prev_schedclass;
>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>  	struct evl_thread *prev, *next, *curr;
>>>  	bool leaving_inband, inband_tail;
>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>  	this_rq->curr = next;
>>>  	leaving_inband = false;
>>>  
>>> +	prev_schedclass = prev->sched_class;
>>> +	if (prev_schedclass->sched_out)
>>> +		prev_schedclass->sched_out(prev, next);
>>> +
>>>  	/*
>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>  	 * performing the rest of the context switch code
>>
>> I've been working on this lately too. It turns out that we need more
>> than this, although this is definitely part of the solution. I'll follow
>> up on this issue.
> 
> This is still wip, I'm sharing this early to discuss details for
> reconciling both proposals (the hunk in __evl_schedule() is merely
> cosmetic, no functional change).
> 

Please avoid that for the final patch. I already had troubles telling
substantial from cosmetical changes apart in your lazy-blocking commit
(>50% unrelated changes in there).

> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
> Author: Philippe Gerum <rpm@xenomai.org>
> Date:   Sun Jun 14 12:08:59 2026 +0200
> 
>     evl: sched/quota: fix budget tracking on preemption
>     
>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>     runtime budget of the former is inaccurately tracked. This is due to
>     the fifo->pick() handler returning a valid thread, which prevents the
>     quota->pick() handler from being called. As a result, the last runtime
>     period of the outgoing thread is not accounted for.
>     
>     To fix this, we introduce a new sched_out() handler which is called
>     for the outgoing thread, which the quota policy uses to update the
>     remaining budget of preempted threads appropriately. In addition, the
>     implementation no longer shares the runnable thread queue with
>     SCHED_FIFO.

...and why we need a separate queue now? Please provide reasoning for
changes.

>     
>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
> 
> diff --git a/include/evl/sched.h b/include/evl/sched.h
> index ae9690860146..cc824c28004b 100644
> --- a/include/evl/sched.h
> +++ b/include/evl/sched.h
> @@ -120,6 +120,7 @@ struct evl_sched_class {
>  	void (*sched_dequeue)(struct evl_thread *thread);
>  	void (*sched_requeue)(struct evl_thread *thread);
>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
> +	void (*sched_out)(struct evl_thread *thread);
>  	void (*sched_yield)(struct evl_thread *thread);
>  	void (*sched_migrate)(struct evl_thread *thread,
>  			      struct evl_rq *rq);
> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
> index dfe3b7390958..dc8416645da8 100644
> --- a/include/evl/sched/quota.h
> +++ b/include/evl/sched/quota.h
> @@ -40,6 +40,7 @@ struct evl_quota_group {
>  
>  struct evl_sched_quota {
>  	ktime_t period;
> +	struct evl_sched_queue runnable;
>  	struct evl_timer refill_timer;
>  	struct evl_timer limit_timer;
>  	struct list_head groups;
> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
> index d1d025e06a5d..7127d4e52df9 100644
> --- a/kernel/evl/sched/core.c
> +++ b/kernel/evl/sched/core.c
> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>  		evl_stop_timer(&rq->rrbtimer);
>  }
>  
> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
> +static __always_inline struct evl_thread *
> +__pick_next_thread(struct evl_rq *rq)
>  {
>  	struct evl_sched_class *sched_class;
>  	struct evl_thread *curr = rq->curr;
> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>  {
> -	struct evl_thread *next = __pick_next_thread(rq);
> +	struct evl_thread *next, *prev = rq->curr;
> +	struct evl_sched_class *prev_class = prev->sched_class;
>  
> +	if (prev_class->sched_out)
> +		prev_class->sched_out(prev);
> +
> +	next = __pick_next_thread(rq);
> +	trace_evl_pick_thread(next);
>  	set_next_running(rq, next);
>  
>  	return next;
> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>  		return;
>  	}
>  
> +	prev = curr;
>  	next = pick_next_thread(this_rq);
> -	trace_evl_pick_thread(next);
> -	if (next == curr) {
> -		if (unlikely(next->state & EVL_T_ROOT)) {
> +	if (next == prev) {
> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>  			if (this_rq->local_flags & RQ_TPROXY)
>  				evl_notify_proxy_tick(this_rq);
>  			if (this_rq->local_flags & RQ_TDEFER)
>  				evl_program_local_tick(&evl_mono_clock);
>  		}
>  		raw_spin_unlock(&this_rq->lock);
> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>  		return;
>  	}
>  
> -	prev = curr;
>  	this_rq->curr = next;
>  	leaving_inband = false;
>  
> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
> index 0829da711a66..14748e3a843e 100644
> --- a/kernel/evl/sched/quota.c
> +++ b/kernel/evl/sched/quota.c
> @@ -12,45 +12,35 @@
>  #include <uapi/evl/sched-abi.h>
>  
>  /*
> - * With this policy, each per-CPU runqueue maintains a list of active
> - * thread groups for the sched_fifo class.
> - *
> - * Each time a thread is picked from the runqueue, we check whether we
> - * still have budget for running it, looking at the group it belongs
> - * to. If so, a timer is armed to elapse when that group has no more
> - * budget, would the incoming thread run unpreempted until then
> - * (i.e. evl_quota->limit_timer).
> + * Each time a thread is picked from the ->runnable queue, we check
> + * whether the group it belongs to still has runtime budget.  If so, a
> + * timer is armed to fire when that group has no more budget, would
> + * the incoming thread run unpreempted until then
> + * (i.e. quota->limit_timer).
>   *
>   * Otherwise, if no budget remains in the group for running the
>   * candidate thread, we move the latter to a local expiry queue
>   * maintained by the group. This process is done on the fly as we pull
> - * from the runqueue.
> + * from the ->runnable queue.
>   *
> - * Updating the remaining budget is done each time the EVL core asks
> - * for replacing the current thread with the next runnable one,
> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
> - * outgoing thread to the relevant group, and conversely, we check
> - * whether the incoming thread has budget.
> + * Updating the remaining budget is done each time the EVL core
> + * schedules out a thread undergoing the quota scheduling policy,
>   *
> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
> - * ticks in the background, in accordance to the defined quota
> - * interval. Thread group budgets get replenished by its handler in
> - * accordance to their respective share, pushing all expired threads
> - * back to the run queue in the same move.
> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
> + * in the background, in accordance to the defined quota interval,
> + * replenishing per-group budgets, pushing all expired threads back to
> + * the quota ->runqueue too.
>   *
> - * NOTE: since the core logic enforcing the budget entirely happens in
> - * evl_quota_pick(), applying a budget change can be done as simply as
> - * forcing the rescheduling procedure to be invoked asap. As a result
> - * of this, the EVL core will ask for the next thread to run, which
> - * means calling evl_quota_pick() eventually.
> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
> + * apply a budget change.
>   *
> - * CAUTION: evl_quota_group->nr_active does count both the threads
> - * from that group linked to the sched_fifo runqueue, _and_ the
> - * threads moved to the local expiry queue. As a matter of fact, the
> - * expired threads - those for which we consumed all the per-group
> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
> - * the EVL core. This only means that the SCHED_QUOTA policy won't
> - * pick them until the corresponding budget is replenished.
> + * CAUTION: quota_group->nr_active does count both the threads from
> + * that group linked to the runnable queue, _and_ the threads moved to
> + * the local expiry queue. As a matter of fact, the expired threads -
> + * those for which we consumed all the per-group budget - are still
> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
> + * only means that the SCHED_QUOTA policy won't pick them until the
> + * corresponding budget is replenished.
>   */
>  
>  #define MAX_QUOTA_GROUPS  1024
> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>  
>  static LIST_HEAD(group_list);
>  
> -static inline bool thread_on_quota(struct evl_thread *thread,
> -				struct evl_quota_group *tg)
> +static inline bool current_on_quota(struct evl_quota_group *tg)
>  {
> -	/*
> -	 * Check whether @thread is running on some CPU, and belongs
> -	 * to quota group @tg.
> -	 */
> -	return thread->quota == tg &&
> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
> +	struct evl_rq *rq = tg->rq;
> +	struct evl_thread *curr = rq->curr;
> +	struct evl_sched_quota *qs = &rq->quota;
> +
> +	if (curr->quota != tg)
> +		return false;
> +
> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
> +		return false;
> +
> +	return evl_timer_is_running(&qs->limit_timer);
>  }
>  
>  static inline bool group_is_active(struct evl_quota_group *tg)
> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>  	 * runqueue, in which case tg->nr_active already accounted for
>  	 * it.
>  	 */
> -	return thread_on_quota(tg->rq->curr, tg);
> +	return current_on_quota(tg);
>  }
>  
>  static inline void replenish_budget(struct evl_sched_quota *qs,
> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>  	} else if (tg->run_credit) {
>  		credit = ktime_sub(tg->quota_peak, budget);
>  		/* Consume the accumulated credit. */
> -		if (tg->run_credit >= credit)
> +		if (tg->run_credit >= credit) {
>  			tg->run_credit =
>  				ktime_sub(tg->run_credit, credit);
> -		else {
> +		} else {
>  			credit = tg->run_credit;
>  			tg->run_credit = 0;
>  		}
> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>  
>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>  {
> -	struct evl_quota_group *tg;
>  	struct evl_thread *thread, *tmp;
> +	struct evl_quota_group *tg;
>  	struct evl_sched_quota *qs;
>  	struct evl_rq *rq;
>  
> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>  			continue;
>  		/*
> -		 * For each group living on this CPU, move all expired
> +		 * For each group pinned on this CPU, move all expired
>  		 * threads back to the runqueue. Since those threads
>  		 * were moved out of the runqueue as we were
>  		 * considering them for execution, we push them back
> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>  		list_for_each_entry_safe_reverse(thread, tmp,
>  						&tg->expired, quota_expired) {
>  			list_del_init(&thread->quota_expired);
> -			evl_add_schedq(&rq->fifo.runnable, thread);
> +			evl_add_schedq(&qs->runnable, thread);
>  		}
>  	}
>  
> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>  	/*
>  	 * Force a rescheduling on the return path of the current
>  	 * interrupt, so that the budget is re-evaluated for the
> -	 * current group in evl_quota_pick().
> +	 * current group in quota_pick().
>  	 */
>  	raw_spin_lock(&rq->lock);
>  	evl_set_self_resched(rq);
> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>  {
>  	struct evl_sched_quota *qs = &rq->quota;
>  
> +	evl_init_schedq(&qs->runnable);
>  	qs->period = quota_period;
>  	INIT_LIST_HEAD(&qs->groups);
>  
> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>  
>  static void quota_kick(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	/*
>  	 * Allow a kicked thread to be elected for running until it
> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>  	 */
>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>  		list_del_init(&thread->quota_expired);
> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
> +		evl_add_schedq_tail(&qs->runnable, thread);
>  	}
>  }
>  
> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>  
>  static void quota_enqueue(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	if (!thread_is_runnable(thread))
>  		list_add_tail(&thread->quota_expired, &tg->expired);
>  	else
> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
> +		evl_add_schedq_tail(&qs->runnable, thread);
>  
>  	tg->nr_active++;
>  }
>  
>  static void quota_dequeue(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	if (!list_empty(&thread->quota_expired))
>  		list_del_init(&thread->quota_expired);
>  	else
> -		evl_del_schedq(&rq->fifo.runnable, thread);
> +		evl_del_schedq(&qs->runnable, thread);
>  
>  	tg->nr_active--;
>  }
>  
>  static void quota_requeue(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	if (!thread_is_runnable(thread))
>  		list_add(&thread->quota_expired, &tg->expired);
>  	else
> -		evl_add_schedq(&rq->fifo.runnable, thread);
> +		evl_add_schedq(&qs->runnable, thread);
>  
>  	tg->nr_active++;
>  }
>  
> -static struct evl_thread *quota_pick(struct evl_rq *rq)
> +static void quota_out(struct evl_thread *thread)
>  {
> -	struct evl_thread *next, *curr = rq->curr;
> -	struct evl_sched_quota *qs = &rq->quota;
> -	struct evl_quota_group *otg, *tg;
> -	ktime_t now, elapsed;
> +	struct evl_sched_quota *qs = &thread->rq->quota;
> +	struct evl_quota_group *tg = thread->quota;
> +	ktime_t now, consumed;
> +
> +	/* Timer off means that we are not tracking quota. */
> +	if (!evl_timer_is_running(&qs->limit_timer))
> +		return;
>  
> -	now = evl_ktime_monotonic();
> -	otg = curr->quota;
> -	if (otg == NULL)
> -		goto pick;
>  	/*
>  	 * Charge the time consumed by the outgoing thread to the
>  	 * group it belongs to.
>  	 */
> -	elapsed = ktime_sub(now, otg->run_start);
> -	if (elapsed < otg->run_budget)
> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
> -	else
> -		otg->run_budget = 0;
> +	now = evl_ktime_monotonic();

Thing brins some, though minor, glitch when doing the run_start update
for the next group already in pick. That's why I'm trying to reuse that
stamp when available.

> +	consumed = ktime_sub(now, tg->run_start);
> +	if (consumed < tg->run_budget) {
> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
> +	} else {
> +		tg->run_budget = 0;
> +		evl_stop_timer(&qs->limit_timer);
> +	}
> +}
> +
> +static struct evl_thread *quota_pick(struct evl_rq *rq)
> +{
> +	struct evl_thread *next, *curr = rq->curr;
> +	struct evl_sched_quota *qs = &rq->quota;
> +	struct evl_quota_group *tg;
> +
>  pick:
> -	next = evl_get_schedq(&rq->fifo.runnable);
> +	next = evl_get_schedq(&qs->runnable);
>  	if (next == NULL) {
>  		evl_stop_timer(&qs->limit_timer);
>  		return NULL;
>  	}
>  
> -	/*
> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
> -	 * sure to detect non-quota threads.
> -	 */
>  	tg = next->quota;
> -	if (tg == NULL)
> -		return next;
> -
> -	tg->run_start = now;
> +	tg->nr_active--;
>  
>  	/*
>  	 * Don't consider budget if kicked, we have to allow this
> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>  	 */
>  	if (next->info & EVL_T_KICKED) {
>  		evl_stop_timer(&qs->limit_timer);
> -		goto out;
> +		return next;
>  	}
>  
> +	/*
> +	 * __pick_next_thread() might have requeued the current
> +	 * thread which is still leading the pack.
> +	 */
> +	if (curr == next)
> +		return next;
> +
>  	if (ktime_to_ns(tg->run_budget) == 0) {
> -		/* Flush expired group members as we go. */
> +		/* Park expired group members as we go. */
>  		list_add_tail(&next->quota_expired, &tg->expired);
>  		goto pick;
>  	}
>  
> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
> -		/* Same group, leave the running timer untouched. */
> -		goto out;
> -
> -	/* Arm limit timer for the new running group. */
> -	evl_start_timer(&qs->limit_timer,
> -			ktime_add(now, tg->run_budget),
> -			EVL_INFINITE);
> -out:
> -	tg->nr_active--;
> +	/* Arm new limit timer on change of running group. */
> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
> +		tg->run_start = evl_ktime_monotonic();
> +		evl_start_timer(&qs->limit_timer,
> +				ktime_add(tg->run_start, tg->run_budget),
> +				EVL_INFINITE);
> +	}
>  
>  	return next;
>  }
> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>  	 * Unregister the group before we drop rq->lock. As a result,
>  	 * it won't accept threads anymore while we are busy moving
>  	 * the current members to the fifo class, and concurrent
> -	 * evl_quota_remove requests would receive -EINVAL.
> +	 * quota_remove requests would receive -EINVAL.
>  	 */
>  	__clear_bit(tg->tgid, group_map);
>  	list_del(&tg->next);
> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>  	 * ABBA issue. No livelock is possible since we unregistered
>  	 * that group already, so &tg->members can only be depleted
> -	 * (by this loop specifically).
> +	 * (by this loop exclusively).
>  	 */
>  
>  	while (!list_empty(&tg->members)) {
> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>  			int *quota_sum_r)
>  {
>  	struct evl_rq *rq = tg->rq;
> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
> +	struct evl_thread *thread, *tmp;
>  	struct evl_sched_quota *qs = &rq->quota;
> -	ktime_t now, elapsed, consumed;
>  	ktime_t old_quota = tg->quota;
> +	ktime_t consumed;
>  	u64 n;
>  
>  	assert_hard_lock(&rq->lock);
> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>  	tg->quota_percent = quota_percent;
>  	tg->quota_peak_percent = quota_peak_percent;
>  
> -	if (thread_on_quota(curr, tg)) {
> -		now = evl_ktime_monotonic();
> -
> -		elapsed = now - tg->run_start;
> -		if (elapsed < tg->run_budget)
> -			tg->run_budget -= elapsed;
> -		else
> -			tg->run_budget = 0;
> -
> -		tg->run_start = now;
> +	if (current_on_quota(tg)) {
> +		quota_out(rq->curr);
>  		evl_stop_timer(&qs->limit_timer);
>  	}
>  
> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>  						quota_expired) {
>  			list_del_init(&thread->quota_expired);
> -			evl_add_schedq(&rq->fifo.runnable, thread);
> +			evl_add_schedq(&qs->runnable, thread);
>  		}
>  	}
>  
> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>  	.sched_dequeue		=	quota_dequeue,
>  	.sched_requeue		=	quota_requeue,
>  	.sched_pick		=	quota_pick,
> +	.sched_out		=	quota_out,
>  	.sched_migrate		=	quota_migrate,
>  	.sched_chkparam		=	quota_chkparam,
>  	.sched_setparam		=	quota_setparam,
> 

You also seem to miss the case I discovered only yesterday as well: When
starting the current thread on a quota via setparam, we also need to set
its run_start stamp at that point.

Please have a closer look at both my test case (libevl) as well as my
patch 2. Currently, I don't see remaining issues with both, but I'm all
ears to learn what I missed.

Jan

-- 
Siemens AG, Foundational Technologies
Linux Expert Center

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  6:58     ` Jan Kiszka
@ 2026-06-15  7:17       ` Philippe Gerum
  2026-06-15  7:22         ` Jan Kiszka
  2026-06-15  8:09       ` Philippe Gerum
  1 sibling, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  7:17 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 15.06.26 08:42, Philippe Gerum wrote:
>> Philippe Gerum <rpm@xenomai.org> writes:
>> 
>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>
>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>
>>>> This shall be invoked before a thread switch, providing both the current
>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>> higher-weighted class is providing the next thread.
>>>>
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>> ---
>>>>  include/evl/sched.h     | 2 ++
>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>  2 files changed, 7 insertions(+)
>>>>
>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>> --- a/include/evl/sched.h
>>>> +++ b/include/evl/sched.h
>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>> +			  struct evl_thread *next);
>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>  			      struct evl_rq *rq);
>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>> --- a/kernel/evl/sched/core.c
>>>> +++ b/kernel/evl/sched/core.c
>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>   */
>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>  {
>>>> +	struct evl_sched_class *prev_schedclass;
>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>  	struct evl_thread *prev, *next, *curr;
>>>>  	bool leaving_inband, inband_tail;
>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>  	this_rq->curr = next;
>>>>  	leaving_inband = false;
>>>>  
>>>> +	prev_schedclass = prev->sched_class;
>>>> +	if (prev_schedclass->sched_out)
>>>> +		prev_schedclass->sched_out(prev, next);
>>>> +
>>>>  	/*
>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>  	 * performing the rest of the context switch code
>>>
>>> I've been working on this lately too. It turns out that we need more
>>> than this, although this is definitely part of the solution. I'll follow
>>> up on this issue.
>> 
>> This is still wip, I'm sharing this early to discuss details for
>> reconciling both proposals (the hunk in __evl_schedule() is merely
>> cosmetic, no functional change).
>> 
>
> Please avoid that for the final patch. I already had troubles telling
> substantial from cosmetical changes apart in your lazy-blocking commit
> (>50% unrelated changes in there).
>
>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>> Author: Philippe Gerum <rpm@xenomai.org>
>> Date:   Sun Jun 14 12:08:59 2026 +0200
>> 
>>     evl: sched/quota: fix budget tracking on preemption
>>     
>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>     runtime budget of the former is inaccurately tracked. This is due to
>>     the fifo->pick() handler returning a valid thread, which prevents the
>>     quota->pick() handler from being called. As a result, the last runtime
>>     period of the outgoing thread is not accounted for.
>>     
>>     To fix this, we introduce a new sched_out() handler which is called
>>     for the outgoing thread, which the quota policy uses to update the
>>     remaining budget of preempted threads appropriately. In addition, the
>>     implementation no longer shares the runnable thread queue with
>>     SCHED_FIFO.
>
> ...and why we need a separate queue now? Please provide reasoning for
> changes.
>
>>     
>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>> 
>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>> index ae9690860146..cc824c28004b 100644
>> --- a/include/evl/sched.h
>> +++ b/include/evl/sched.h
>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>  	void (*sched_requeue)(struct evl_thread *thread);
>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>> +	void (*sched_out)(struct evl_thread *thread);
>>  	void (*sched_yield)(struct evl_thread *thread);
>>  	void (*sched_migrate)(struct evl_thread *thread,
>>  			      struct evl_rq *rq);
>> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
>> index dfe3b7390958..dc8416645da8 100644
>> --- a/include/evl/sched/quota.h
>> +++ b/include/evl/sched/quota.h
>> @@ -40,6 +40,7 @@ struct evl_quota_group {
>>  
>>  struct evl_sched_quota {
>>  	ktime_t period;
>> +	struct evl_sched_queue runnable;
>>  	struct evl_timer refill_timer;
>>  	struct evl_timer limit_timer;
>>  	struct list_head groups;
>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>> index d1d025e06a5d..7127d4e52df9 100644
>> --- a/kernel/evl/sched/core.c
>> +++ b/kernel/evl/sched/core.c
>> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>>  		evl_stop_timer(&rq->rrbtimer);
>>  }
>>  
>> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>> +static __always_inline struct evl_thread *
>> +__pick_next_thread(struct evl_rq *rq)
>>  {
>>  	struct evl_sched_class *sched_class;
>>  	struct evl_thread *curr = rq->curr;
>> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>>  {
>> -	struct evl_thread *next = __pick_next_thread(rq);
>> +	struct evl_thread *next, *prev = rq->curr;
>> +	struct evl_sched_class *prev_class = prev->sched_class;
>>  
>> +	if (prev_class->sched_out)
>> +		prev_class->sched_out(prev);
>> +
>> +	next = __pick_next_thread(rq);
>> +	trace_evl_pick_thread(next);
>>  	set_next_running(rq, next);
>>  
>>  	return next;
>> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>  		return;
>>  	}
>>  
>> +	prev = curr;
>>  	next = pick_next_thread(this_rq);
>> -	trace_evl_pick_thread(next);
>> -	if (next == curr) {
>> -		if (unlikely(next->state & EVL_T_ROOT)) {
>> +	if (next == prev) {
>> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>>  			if (this_rq->local_flags & RQ_TPROXY)
>>  				evl_notify_proxy_tick(this_rq);
>>  			if (this_rq->local_flags & RQ_TDEFER)
>>  				evl_program_local_tick(&evl_mono_clock);
>>  		}
>>  		raw_spin_unlock(&this_rq->lock);
>> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
>> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>>  		return;
>>  	}
>>  
>> -	prev = curr;
>>  	this_rq->curr = next;
>>  	leaving_inband = false;
>>  
>> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
>> index 0829da711a66..14748e3a843e 100644
>> --- a/kernel/evl/sched/quota.c
>> +++ b/kernel/evl/sched/quota.c
>> @@ -12,45 +12,35 @@
>>  #include <uapi/evl/sched-abi.h>
>>  
>>  /*
>> - * With this policy, each per-CPU runqueue maintains a list of active
>> - * thread groups for the sched_fifo class.
>> - *
>> - * Each time a thread is picked from the runqueue, we check whether we
>> - * still have budget for running it, looking at the group it belongs
>> - * to. If so, a timer is armed to elapse when that group has no more
>> - * budget, would the incoming thread run unpreempted until then
>> - * (i.e. evl_quota->limit_timer).
>> + * Each time a thread is picked from the ->runnable queue, we check
>> + * whether the group it belongs to still has runtime budget.  If so, a
>> + * timer is armed to fire when that group has no more budget, would
>> + * the incoming thread run unpreempted until then
>> + * (i.e. quota->limit_timer).
>>   *
>>   * Otherwise, if no budget remains in the group for running the
>>   * candidate thread, we move the latter to a local expiry queue
>>   * maintained by the group. This process is done on the fly as we pull
>> - * from the runqueue.
>> + * from the ->runnable queue.
>>   *
>> - * Updating the remaining budget is done each time the EVL core asks
>> - * for replacing the current thread with the next runnable one,
>> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
>> - * outgoing thread to the relevant group, and conversely, we check
>> - * whether the incoming thread has budget.
>> + * Updating the remaining budget is done each time the EVL core
>> + * schedules out a thread undergoing the quota scheduling policy,
>>   *
>> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
>> - * ticks in the background, in accordance to the defined quota
>> - * interval. Thread group budgets get replenished by its handler in
>> - * accordance to their respective share, pushing all expired threads
>> - * back to the run queue in the same move.
>> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
>> + * in the background, in accordance to the defined quota interval,
>> + * replenishing per-group budgets, pushing all expired threads back to
>> + * the quota ->runqueue too.
>>   *
>> - * NOTE: since the core logic enforcing the budget entirely happens in
>> - * evl_quota_pick(), applying a budget change can be done as simply as
>> - * forcing the rescheduling procedure to be invoked asap. As a result
>> - * of this, the EVL core will ask for the next thread to run, which
>> - * means calling evl_quota_pick() eventually.
>> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
>> + * apply a budget change.
>>   *
>> - * CAUTION: evl_quota_group->nr_active does count both the threads
>> - * from that group linked to the sched_fifo runqueue, _and_ the
>> - * threads moved to the local expiry queue. As a matter of fact, the
>> - * expired threads - those for which we consumed all the per-group
>> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
>> - * the EVL core. This only means that the SCHED_QUOTA policy won't
>> - * pick them until the corresponding budget is replenished.
>> + * CAUTION: quota_group->nr_active does count both the threads from
>> + * that group linked to the runnable queue, _and_ the threads moved to
>> + * the local expiry queue. As a matter of fact, the expired threads -
>> + * those for which we consumed all the per-group budget - are still
>> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
>> + * only means that the SCHED_QUOTA policy won't pick them until the
>> + * corresponding budget is replenished.
>>   */
>>  
>>  #define MAX_QUOTA_GROUPS  1024
>> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>>  
>>  static LIST_HEAD(group_list);
>>  
>> -static inline bool thread_on_quota(struct evl_thread *thread,
>> -				struct evl_quota_group *tg)
>> +static inline bool current_on_quota(struct evl_quota_group *tg)
>>  {
>> -	/*
>> -	 * Check whether @thread is running on some CPU, and belongs
>> -	 * to quota group @tg.
>> -	 */
>> -	return thread->quota == tg &&
>> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
>> +	struct evl_rq *rq = tg->rq;
>> +	struct evl_thread *curr = rq->curr;
>> +	struct evl_sched_quota *qs = &rq->quota;
>> +
>> +	if (curr->quota != tg)
>> +		return false;
>> +
>> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
>> +		return false;
>> +
>> +	return evl_timer_is_running(&qs->limit_timer);
>>  }
>>  
>>  static inline bool group_is_active(struct evl_quota_group *tg)
>> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>>  	 * runqueue, in which case tg->nr_active already accounted for
>>  	 * it.
>>  	 */
>> -	return thread_on_quota(tg->rq->curr, tg);
>> +	return current_on_quota(tg);
>>  }
>>  
>>  static inline void replenish_budget(struct evl_sched_quota *qs,
>> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>  	} else if (tg->run_credit) {
>>  		credit = ktime_sub(tg->quota_peak, budget);
>>  		/* Consume the accumulated credit. */
>> -		if (tg->run_credit >= credit)
>> +		if (tg->run_credit >= credit) {
>>  			tg->run_credit =
>>  				ktime_sub(tg->run_credit, credit);
>> -		else {
>> +		} else {
>>  			credit = tg->run_credit;
>>  			tg->run_credit = 0;
>>  		}
>> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>  
>>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>>  {
>> -	struct evl_quota_group *tg;
>>  	struct evl_thread *thread, *tmp;
>> +	struct evl_quota_group *tg;
>>  	struct evl_sched_quota *qs;
>>  	struct evl_rq *rq;
>>  
>> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>>  			continue;
>>  		/*
>> -		 * For each group living on this CPU, move all expired
>> +		 * For each group pinned on this CPU, move all expired
>>  		 * threads back to the runqueue. Since those threads
>>  		 * were moved out of the runqueue as we were
>>  		 * considering them for execution, we push them back
>> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>  		list_for_each_entry_safe_reverse(thread, tmp,
>>  						&tg->expired, quota_expired) {
>>  			list_del_init(&thread->quota_expired);
>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>> +			evl_add_schedq(&qs->runnable, thread);
>>  		}
>>  	}
>>  
>> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>>  	/*
>>  	 * Force a rescheduling on the return path of the current
>>  	 * interrupt, so that the budget is re-evaluated for the
>> -	 * current group in evl_quota_pick().
>> +	 * current group in quota_pick().
>>  	 */
>>  	raw_spin_lock(&rq->lock);
>>  	evl_set_self_resched(rq);
>> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>>  {
>>  	struct evl_sched_quota *qs = &rq->quota;
>>  
>> +	evl_init_schedq(&qs->runnable);
>>  	qs->period = quota_period;
>>  	INIT_LIST_HEAD(&qs->groups);
>>  
>> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>>  
>>  static void quota_kick(struct evl_thread *thread)
>>  {
>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>  	struct evl_quota_group *tg = thread->quota;
>> -	struct evl_rq *rq = thread->rq;
>>  
>>  	/*
>>  	 * Allow a kicked thread to be elected for running until it
>> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>>  	 */
>>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>>  		list_del_init(&thread->quota_expired);
>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>  	}
>>  }
>>  
>> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>>  
>>  static void quota_enqueue(struct evl_thread *thread)
>>  {
>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>  	struct evl_quota_group *tg = thread->quota;
>> -	struct evl_rq *rq = thread->rq;
>>  
>>  	if (!thread_is_runnable(thread))
>>  		list_add_tail(&thread->quota_expired, &tg->expired);
>>  	else
>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>  
>>  	tg->nr_active++;
>>  }
>>  
>>  static void quota_dequeue(struct evl_thread *thread)
>>  {
>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>  	struct evl_quota_group *tg = thread->quota;
>> -	struct evl_rq *rq = thread->rq;
>>  
>>  	if (!list_empty(&thread->quota_expired))
>>  		list_del_init(&thread->quota_expired);
>>  	else
>> -		evl_del_schedq(&rq->fifo.runnable, thread);
>> +		evl_del_schedq(&qs->runnable, thread);
>>  
>>  	tg->nr_active--;
>>  }
>>  
>>  static void quota_requeue(struct evl_thread *thread)
>>  {
>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>  	struct evl_quota_group *tg = thread->quota;
>> -	struct evl_rq *rq = thread->rq;
>>  
>>  	if (!thread_is_runnable(thread))
>>  		list_add(&thread->quota_expired, &tg->expired);
>>  	else
>> -		evl_add_schedq(&rq->fifo.runnable, thread);
>> +		evl_add_schedq(&qs->runnable, thread);
>>  
>>  	tg->nr_active++;
>>  }
>>  
>> -static struct evl_thread *quota_pick(struct evl_rq *rq)
>> +static void quota_out(struct evl_thread *thread)
>>  {
>> -	struct evl_thread *next, *curr = rq->curr;
>> -	struct evl_sched_quota *qs = &rq->quota;
>> -	struct evl_quota_group *otg, *tg;
>> -	ktime_t now, elapsed;
>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>> +	struct evl_quota_group *tg = thread->quota;
>> +	ktime_t now, consumed;
>> +
>> +	/* Timer off means that we are not tracking quota. */
>> +	if (!evl_timer_is_running(&qs->limit_timer))
>> +		return;
>>  
>> -	now = evl_ktime_monotonic();
>> -	otg = curr->quota;
>> -	if (otg == NULL)
>> -		goto pick;
>>  	/*
>>  	 * Charge the time consumed by the outgoing thread to the
>>  	 * group it belongs to.
>>  	 */
>> -	elapsed = ktime_sub(now, otg->run_start);
>> -	if (elapsed < otg->run_budget)
>> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
>> -	else
>> -		otg->run_budget = 0;
>> +	now = evl_ktime_monotonic();
>
> Thing brins some, though minor, glitch when doing the run_start update
> for the next group already in pick. That's why I'm trying to reuse that
> stamp when available.
>
>> +	consumed = ktime_sub(now, tg->run_start);
>> +	if (consumed < tg->run_budget) {
>> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>> +	} else {
>> +		tg->run_budget = 0;
>> +		evl_stop_timer(&qs->limit_timer);
>> +	}
>> +}
>> +
>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>> +{
>> +	struct evl_thread *next, *curr = rq->curr;
>> +	struct evl_sched_quota *qs = &rq->quota;
>> +	struct evl_quota_group *tg;
>> +
>>  pick:
>> -	next = evl_get_schedq(&rq->fifo.runnable);
>> +	next = evl_get_schedq(&qs->runnable);
>>  	if (next == NULL) {
>>  		evl_stop_timer(&qs->limit_timer);
>>  		return NULL;
>>  	}
>>  
>> -	/*
>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>> -	 * sure to detect non-quota threads.
>> -	 */
>>  	tg = next->quota;
>> -	if (tg == NULL)
>> -		return next;
>> -
>> -	tg->run_start = now;
>> +	tg->nr_active--;
>>  
>>  	/*
>>  	 * Don't consider budget if kicked, we have to allow this
>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>  	 */
>>  	if (next->info & EVL_T_KICKED) {
>>  		evl_stop_timer(&qs->limit_timer);
>> -		goto out;
>> +		return next;
>>  	}
>>  
>> +	/*
>> +	 * __pick_next_thread() might have requeued the current
>> +	 * thread which is still leading the pack.
>> +	 */
>> +	if (curr == next)
>> +		return next;
>> +
>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>> -		/* Flush expired group members as we go. */
>> +		/* Park expired group members as we go. */
>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>  		goto pick;
>>  	}
>>  
>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>> -		/* Same group, leave the running timer untouched. */
>> -		goto out;
>> -
>> -	/* Arm limit timer for the new running group. */
>> -	evl_start_timer(&qs->limit_timer,
>> -			ktime_add(now, tg->run_budget),
>> -			EVL_INFINITE);
>> -out:
>> -	tg->nr_active--;
>> +	/* Arm new limit timer on change of running group. */
>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>> +		tg->run_start = evl_ktime_monotonic();
>> +		evl_start_timer(&qs->limit_timer,
>> +				ktime_add(tg->run_start, tg->run_budget),
>> +				EVL_INFINITE);
>> +	}
>>  
>>  	return next;
>>  }
>> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>  	 * Unregister the group before we drop rq->lock. As a result,
>>  	 * it won't accept threads anymore while we are busy moving
>>  	 * the current members to the fifo class, and concurrent
>> -	 * evl_quota_remove requests would receive -EINVAL.
>> +	 * quota_remove requests would receive -EINVAL.
>>  	 */
>>  	__clear_bit(tg->tgid, group_map);
>>  	list_del(&tg->next);
>> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>>  	 * ABBA issue. No livelock is possible since we unregistered
>>  	 * that group already, so &tg->members can only be depleted
>> -	 * (by this loop specifically).
>> +	 * (by this loop exclusively).
>>  	 */
>>  
>>  	while (!list_empty(&tg->members)) {
>> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>  			int *quota_sum_r)
>>  {
>>  	struct evl_rq *rq = tg->rq;
>> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
>> +	struct evl_thread *thread, *tmp;
>>  	struct evl_sched_quota *qs = &rq->quota;
>> -	ktime_t now, elapsed, consumed;
>>  	ktime_t old_quota = tg->quota;
>> +	ktime_t consumed;
>>  	u64 n;
>>  
>>  	assert_hard_lock(&rq->lock);
>> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>  	tg->quota_percent = quota_percent;
>>  	tg->quota_peak_percent = quota_peak_percent;
>>  
>> -	if (thread_on_quota(curr, tg)) {
>> -		now = evl_ktime_monotonic();
>> -
>> -		elapsed = now - tg->run_start;
>> -		if (elapsed < tg->run_budget)
>> -			tg->run_budget -= elapsed;
>> -		else
>> -			tg->run_budget = 0;
>> -
>> -		tg->run_start = now;
>> +	if (current_on_quota(tg)) {
>> +		quota_out(rq->curr);
>>  		evl_stop_timer(&qs->limit_timer);
>>  	}
>>  
>> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>>  						quota_expired) {
>>  			list_del_init(&thread->quota_expired);
>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>> +			evl_add_schedq(&qs->runnable, thread);
>>  		}
>>  	}
>>  
>> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>>  	.sched_dequeue		=	quota_dequeue,
>>  	.sched_requeue		=	quota_requeue,
>>  	.sched_pick		=	quota_pick,
>> +	.sched_out		=	quota_out,
>>  	.sched_migrate		=	quota_migrate,
>>  	.sched_chkparam		=	quota_chkparam,
>>  	.sched_setparam		=	quota_setparam,
>> 
>
> You also seem to miss the case I discovered only yesterday as well: When
> starting the current thread on a quota via setparam, we also need to set
> its run_start stamp at that point.

run_start should be meaningful only when a limit timer runs. 

>
> Please have a closer look at both my test case (libevl) as well as my
> patch 2. Currently, I don't see remaining issues with both, but I'm all
> ears to learn what I missed.
>

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  7:17       ` Philippe Gerum
@ 2026-06-15  7:22         ` Jan Kiszka
  2026-06-15  7:30           ` Philippe Gerum
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2026-06-15  7:22 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: Xenomai

On 15.06.26 09:17, Philippe Gerum wrote:
> Jan Kiszka <jan.kiszka@siemens.com> writes:
> 
>> On 15.06.26 08:42, Philippe Gerum wrote:
>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>
>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>
>>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>
>>>>> This shall be invoked before a thread switch, providing both the current
>>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>>> higher-weighted class is providing the next thread.
>>>>>
>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>> ---
>>>>>  include/evl/sched.h     | 2 ++
>>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>>  2 files changed, 7 insertions(+)
>>>>>
>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>>> --- a/include/evl/sched.h
>>>>> +++ b/include/evl/sched.h
>>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>>> +			  struct evl_thread *next);
>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>  			      struct evl_rq *rq);
>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>>> --- a/kernel/evl/sched/core.c
>>>>> +++ b/kernel/evl/sched/core.c
>>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>>   */
>>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>  {
>>>>> +	struct evl_sched_class *prev_schedclass;
>>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>>  	struct evl_thread *prev, *next, *curr;
>>>>>  	bool leaving_inband, inband_tail;
>>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>  	this_rq->curr = next;
>>>>>  	leaving_inband = false;
>>>>>  
>>>>> +	prev_schedclass = prev->sched_class;
>>>>> +	if (prev_schedclass->sched_out)
>>>>> +		prev_schedclass->sched_out(prev, next);
>>>>> +
>>>>>  	/*
>>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>>  	 * performing the rest of the context switch code
>>>>
>>>> I've been working on this lately too. It turns out that we need more
>>>> than this, although this is definitely part of the solution. I'll follow
>>>> up on this issue.
>>>
>>> This is still wip, I'm sharing this early to discuss details for
>>> reconciling both proposals (the hunk in __evl_schedule() is merely
>>> cosmetic, no functional change).
>>>
>>
>> Please avoid that for the final patch. I already had troubles telling
>> substantial from cosmetical changes apart in your lazy-blocking commit
>> (>50% unrelated changes in there).
>>
>>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>>> Author: Philippe Gerum <rpm@xenomai.org>
>>> Date:   Sun Jun 14 12:08:59 2026 +0200
>>>
>>>     evl: sched/quota: fix budget tracking on preemption
>>>     
>>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>>     runtime budget of the former is inaccurately tracked. This is due to
>>>     the fifo->pick() handler returning a valid thread, which prevents the
>>>     quota->pick() handler from being called. As a result, the last runtime
>>>     period of the outgoing thread is not accounted for.
>>>     
>>>     To fix this, we introduce a new sched_out() handler which is called
>>>     for the outgoing thread, which the quota policy uses to update the
>>>     remaining budget of preempted threads appropriately. In addition, the
>>>     implementation no longer shares the runnable thread queue with
>>>     SCHED_FIFO.
>>
>> ...and why we need a separate queue now? Please provide reasoning for
>> changes.
>>
>>>     
>>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>>>
>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>> index ae9690860146..cc824c28004b 100644
>>> --- a/include/evl/sched.h
>>> +++ b/include/evl/sched.h
>>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>> +	void (*sched_out)(struct evl_thread *thread);
>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>  			      struct evl_rq *rq);
>>> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
>>> index dfe3b7390958..dc8416645da8 100644
>>> --- a/include/evl/sched/quota.h
>>> +++ b/include/evl/sched/quota.h
>>> @@ -40,6 +40,7 @@ struct evl_quota_group {
>>>  
>>>  struct evl_sched_quota {
>>>  	ktime_t period;
>>> +	struct evl_sched_queue runnable;
>>>  	struct evl_timer refill_timer;
>>>  	struct evl_timer limit_timer;
>>>  	struct list_head groups;
>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>> index d1d025e06a5d..7127d4e52df9 100644
>>> --- a/kernel/evl/sched/core.c
>>> +++ b/kernel/evl/sched/core.c
>>> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>>>  		evl_stop_timer(&rq->rrbtimer);
>>>  }
>>>  
>>> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>> +static __always_inline struct evl_thread *
>>> +__pick_next_thread(struct evl_rq *rq)
>>>  {
>>>  	struct evl_sched_class *sched_class;
>>>  	struct evl_thread *curr = rq->curr;
>>> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>>>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>>>  {
>>> -	struct evl_thread *next = __pick_next_thread(rq);
>>> +	struct evl_thread *next, *prev = rq->curr;
>>> +	struct evl_sched_class *prev_class = prev->sched_class;
>>>  
>>> +	if (prev_class->sched_out)
>>> +		prev_class->sched_out(prev);
>>> +
>>> +	next = __pick_next_thread(rq);
>>> +	trace_evl_pick_thread(next);
>>>  	set_next_running(rq, next);
>>>  
>>>  	return next;
>>> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>  		return;
>>>  	}
>>>  
>>> +	prev = curr;
>>>  	next = pick_next_thread(this_rq);
>>> -	trace_evl_pick_thread(next);
>>> -	if (next == curr) {
>>> -		if (unlikely(next->state & EVL_T_ROOT)) {
>>> +	if (next == prev) {
>>> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>>>  			if (this_rq->local_flags & RQ_TPROXY)
>>>  				evl_notify_proxy_tick(this_rq);
>>>  			if (this_rq->local_flags & RQ_TDEFER)
>>>  				evl_program_local_tick(&evl_mono_clock);
>>>  		}
>>>  		raw_spin_unlock(&this_rq->lock);
>>> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
>>> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>>>  		return;
>>>  	}
>>>  
>>> -	prev = curr;
>>>  	this_rq->curr = next;
>>>  	leaving_inband = false;
>>>  
>>> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
>>> index 0829da711a66..14748e3a843e 100644
>>> --- a/kernel/evl/sched/quota.c
>>> +++ b/kernel/evl/sched/quota.c
>>> @@ -12,45 +12,35 @@
>>>  #include <uapi/evl/sched-abi.h>
>>>  
>>>  /*
>>> - * With this policy, each per-CPU runqueue maintains a list of active
>>> - * thread groups for the sched_fifo class.
>>> - *
>>> - * Each time a thread is picked from the runqueue, we check whether we
>>> - * still have budget for running it, looking at the group it belongs
>>> - * to. If so, a timer is armed to elapse when that group has no more
>>> - * budget, would the incoming thread run unpreempted until then
>>> - * (i.e. evl_quota->limit_timer).
>>> + * Each time a thread is picked from the ->runnable queue, we check
>>> + * whether the group it belongs to still has runtime budget.  If so, a
>>> + * timer is armed to fire when that group has no more budget, would
>>> + * the incoming thread run unpreempted until then
>>> + * (i.e. quota->limit_timer).
>>>   *
>>>   * Otherwise, if no budget remains in the group for running the
>>>   * candidate thread, we move the latter to a local expiry queue
>>>   * maintained by the group. This process is done on the fly as we pull
>>> - * from the runqueue.
>>> + * from the ->runnable queue.
>>>   *
>>> - * Updating the remaining budget is done each time the EVL core asks
>>> - * for replacing the current thread with the next runnable one,
>>> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
>>> - * outgoing thread to the relevant group, and conversely, we check
>>> - * whether the incoming thread has budget.
>>> + * Updating the remaining budget is done each time the EVL core
>>> + * schedules out a thread undergoing the quota scheduling policy,
>>>   *
>>> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
>>> - * ticks in the background, in accordance to the defined quota
>>> - * interval. Thread group budgets get replenished by its handler in
>>> - * accordance to their respective share, pushing all expired threads
>>> - * back to the run queue in the same move.
>>> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
>>> + * in the background, in accordance to the defined quota interval,
>>> + * replenishing per-group budgets, pushing all expired threads back to
>>> + * the quota ->runqueue too.
>>>   *
>>> - * NOTE: since the core logic enforcing the budget entirely happens in
>>> - * evl_quota_pick(), applying a budget change can be done as simply as
>>> - * forcing the rescheduling procedure to be invoked asap. As a result
>>> - * of this, the EVL core will ask for the next thread to run, which
>>> - * means calling evl_quota_pick() eventually.
>>> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
>>> + * apply a budget change.
>>>   *
>>> - * CAUTION: evl_quota_group->nr_active does count both the threads
>>> - * from that group linked to the sched_fifo runqueue, _and_ the
>>> - * threads moved to the local expiry queue. As a matter of fact, the
>>> - * expired threads - those for which we consumed all the per-group
>>> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
>>> - * the EVL core. This only means that the SCHED_QUOTA policy won't
>>> - * pick them until the corresponding budget is replenished.
>>> + * CAUTION: quota_group->nr_active does count both the threads from
>>> + * that group linked to the runnable queue, _and_ the threads moved to
>>> + * the local expiry queue. As a matter of fact, the expired threads -
>>> + * those for which we consumed all the per-group budget - are still
>>> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
>>> + * only means that the SCHED_QUOTA policy won't pick them until the
>>> + * corresponding budget is replenished.
>>>   */
>>>  
>>>  #define MAX_QUOTA_GROUPS  1024
>>> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>>>  
>>>  static LIST_HEAD(group_list);
>>>  
>>> -static inline bool thread_on_quota(struct evl_thread *thread,
>>> -				struct evl_quota_group *tg)
>>> +static inline bool current_on_quota(struct evl_quota_group *tg)
>>>  {
>>> -	/*
>>> -	 * Check whether @thread is running on some CPU, and belongs
>>> -	 * to quota group @tg.
>>> -	 */
>>> -	return thread->quota == tg &&
>>> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
>>> +	struct evl_rq *rq = tg->rq;
>>> +	struct evl_thread *curr = rq->curr;
>>> +	struct evl_sched_quota *qs = &rq->quota;
>>> +
>>> +	if (curr->quota != tg)
>>> +		return false;
>>> +
>>> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
>>> +		return false;
>>> +
>>> +	return evl_timer_is_running(&qs->limit_timer);
>>>  }
>>>  
>>>  static inline bool group_is_active(struct evl_quota_group *tg)
>>> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>>>  	 * runqueue, in which case tg->nr_active already accounted for
>>>  	 * it.
>>>  	 */
>>> -	return thread_on_quota(tg->rq->curr, tg);
>>> +	return current_on_quota(tg);
>>>  }
>>>  
>>>  static inline void replenish_budget(struct evl_sched_quota *qs,
>>> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>  	} else if (tg->run_credit) {
>>>  		credit = ktime_sub(tg->quota_peak, budget);
>>>  		/* Consume the accumulated credit. */
>>> -		if (tg->run_credit >= credit)
>>> +		if (tg->run_credit >= credit) {
>>>  			tg->run_credit =
>>>  				ktime_sub(tg->run_credit, credit);
>>> -		else {
>>> +		} else {
>>>  			credit = tg->run_credit;
>>>  			tg->run_credit = 0;
>>>  		}
>>> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>  
>>>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>>>  {
>>> -	struct evl_quota_group *tg;
>>>  	struct evl_thread *thread, *tmp;
>>> +	struct evl_quota_group *tg;
>>>  	struct evl_sched_quota *qs;
>>>  	struct evl_rq *rq;
>>>  
>>> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>>>  			continue;
>>>  		/*
>>> -		 * For each group living on this CPU, move all expired
>>> +		 * For each group pinned on this CPU, move all expired
>>>  		 * threads back to the runqueue. Since those threads
>>>  		 * were moved out of the runqueue as we were
>>>  		 * considering them for execution, we push them back
>>> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>  		list_for_each_entry_safe_reverse(thread, tmp,
>>>  						&tg->expired, quota_expired) {
>>>  			list_del_init(&thread->quota_expired);
>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>> +			evl_add_schedq(&qs->runnable, thread);
>>>  		}
>>>  	}
>>>  
>>> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>>>  	/*
>>>  	 * Force a rescheduling on the return path of the current
>>>  	 * interrupt, so that the budget is re-evaluated for the
>>> -	 * current group in evl_quota_pick().
>>> +	 * current group in quota_pick().
>>>  	 */
>>>  	raw_spin_lock(&rq->lock);
>>>  	evl_set_self_resched(rq);
>>> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>>>  {
>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>  
>>> +	evl_init_schedq(&qs->runnable);
>>>  	qs->period = quota_period;
>>>  	INIT_LIST_HEAD(&qs->groups);
>>>  
>>> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>>>  
>>>  static void quota_kick(struct evl_thread *thread)
>>>  {
>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>  	struct evl_quota_group *tg = thread->quota;
>>> -	struct evl_rq *rq = thread->rq;
>>>  
>>>  	/*
>>>  	 * Allow a kicked thread to be elected for running until it
>>> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>>>  	 */
>>>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>>>  		list_del_init(&thread->quota_expired);
>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>  	}
>>>  }
>>>  
>>> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>>>  
>>>  static void quota_enqueue(struct evl_thread *thread)
>>>  {
>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>  	struct evl_quota_group *tg = thread->quota;
>>> -	struct evl_rq *rq = thread->rq;
>>>  
>>>  	if (!thread_is_runnable(thread))
>>>  		list_add_tail(&thread->quota_expired, &tg->expired);
>>>  	else
>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>  
>>>  	tg->nr_active++;
>>>  }
>>>  
>>>  static void quota_dequeue(struct evl_thread *thread)
>>>  {
>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>  	struct evl_quota_group *tg = thread->quota;
>>> -	struct evl_rq *rq = thread->rq;
>>>  
>>>  	if (!list_empty(&thread->quota_expired))
>>>  		list_del_init(&thread->quota_expired);
>>>  	else
>>> -		evl_del_schedq(&rq->fifo.runnable, thread);
>>> +		evl_del_schedq(&qs->runnable, thread);
>>>  
>>>  	tg->nr_active--;
>>>  }
>>>  
>>>  static void quota_requeue(struct evl_thread *thread)
>>>  {
>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>  	struct evl_quota_group *tg = thread->quota;
>>> -	struct evl_rq *rq = thread->rq;
>>>  
>>>  	if (!thread_is_runnable(thread))
>>>  		list_add(&thread->quota_expired, &tg->expired);
>>>  	else
>>> -		evl_add_schedq(&rq->fifo.runnable, thread);
>>> +		evl_add_schedq(&qs->runnable, thread);
>>>  
>>>  	tg->nr_active++;
>>>  }
>>>  
>>> -static struct evl_thread *quota_pick(struct evl_rq *rq)
>>> +static void quota_out(struct evl_thread *thread)
>>>  {
>>> -	struct evl_thread *next, *curr = rq->curr;
>>> -	struct evl_sched_quota *qs = &rq->quota;
>>> -	struct evl_quota_group *otg, *tg;
>>> -	ktime_t now, elapsed;
>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>> +	struct evl_quota_group *tg = thread->quota;
>>> +	ktime_t now, consumed;
>>> +
>>> +	/* Timer off means that we are not tracking quota. */
>>> +	if (!evl_timer_is_running(&qs->limit_timer))
>>> +		return;
>>>  
>>> -	now = evl_ktime_monotonic();
>>> -	otg = curr->quota;
>>> -	if (otg == NULL)
>>> -		goto pick;
>>>  	/*
>>>  	 * Charge the time consumed by the outgoing thread to the
>>>  	 * group it belongs to.
>>>  	 */
>>> -	elapsed = ktime_sub(now, otg->run_start);
>>> -	if (elapsed < otg->run_budget)
>>> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
>>> -	else
>>> -		otg->run_budget = 0;
>>> +	now = evl_ktime_monotonic();
>>
>> Thing brins some, though minor, glitch when doing the run_start update
>> for the next group already in pick. That's why I'm trying to reuse that
>> stamp when available.
>>
>>> +	consumed = ktime_sub(now, tg->run_start);
>>> +	if (consumed < tg->run_budget) {
>>> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>>> +	} else {
>>> +		tg->run_budget = 0;
>>> +		evl_stop_timer(&qs->limit_timer);
>>> +	}
>>> +}
>>> +
>>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>>> +{
>>> +	struct evl_thread *next, *curr = rq->curr;
>>> +	struct evl_sched_quota *qs = &rq->quota;
>>> +	struct evl_quota_group *tg;
>>> +
>>>  pick:
>>> -	next = evl_get_schedq(&rq->fifo.runnable);
>>> +	next = evl_get_schedq(&qs->runnable);
>>>  	if (next == NULL) {
>>>  		evl_stop_timer(&qs->limit_timer);
>>>  		return NULL;
>>>  	}
>>>  
>>> -	/*
>>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>>> -	 * sure to detect non-quota threads.
>>> -	 */
>>>  	tg = next->quota;
>>> -	if (tg == NULL)
>>> -		return next;
>>> -
>>> -	tg->run_start = now;
>>> +	tg->nr_active--;
>>>  
>>>  	/*
>>>  	 * Don't consider budget if kicked, we have to allow this
>>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>  	 */
>>>  	if (next->info & EVL_T_KICKED) {
>>>  		evl_stop_timer(&qs->limit_timer);
>>> -		goto out;
>>> +		return next;
>>>  	}
>>>  
>>> +	/*
>>> +	 * __pick_next_thread() might have requeued the current
>>> +	 * thread which is still leading the pack.
>>> +	 */
>>> +	if (curr == next)
>>> +		return next;
>>> +
>>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>>> -		/* Flush expired group members as we go. */
>>> +		/* Park expired group members as we go. */
>>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>>  		goto pick;
>>>  	}
>>>  
>>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>>> -		/* Same group, leave the running timer untouched. */
>>> -		goto out;
>>> -
>>> -	/* Arm limit timer for the new running group. */
>>> -	evl_start_timer(&qs->limit_timer,
>>> -			ktime_add(now, tg->run_budget),
>>> -			EVL_INFINITE);
>>> -out:
>>> -	tg->nr_active--;
>>> +	/* Arm new limit timer on change of running group. */
>>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>>> +		tg->run_start = evl_ktime_monotonic();
>>> +		evl_start_timer(&qs->limit_timer,
>>> +				ktime_add(tg->run_start, tg->run_budget),
>>> +				EVL_INFINITE);
>>> +	}
>>>  
>>>  	return next;
>>>  }
>>> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>  	 * Unregister the group before we drop rq->lock. As a result,
>>>  	 * it won't accept threads anymore while we are busy moving
>>>  	 * the current members to the fifo class, and concurrent
>>> -	 * evl_quota_remove requests would receive -EINVAL.
>>> +	 * quota_remove requests would receive -EINVAL.
>>>  	 */
>>>  	__clear_bit(tg->tgid, group_map);
>>>  	list_del(&tg->next);
>>> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>>>  	 * ABBA issue. No livelock is possible since we unregistered
>>>  	 * that group already, so &tg->members can only be depleted
>>> -	 * (by this loop specifically).
>>> +	 * (by this loop exclusively).
>>>  	 */
>>>  
>>>  	while (!list_empty(&tg->members)) {
>>> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>  			int *quota_sum_r)
>>>  {
>>>  	struct evl_rq *rq = tg->rq;
>>> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
>>> +	struct evl_thread *thread, *tmp;
>>>  	struct evl_sched_quota *qs = &rq->quota;
>>> -	ktime_t now, elapsed, consumed;
>>>  	ktime_t old_quota = tg->quota;
>>> +	ktime_t consumed;
>>>  	u64 n;
>>>  
>>>  	assert_hard_lock(&rq->lock);
>>> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>  	tg->quota_percent = quota_percent;
>>>  	tg->quota_peak_percent = quota_peak_percent;
>>>  
>>> -	if (thread_on_quota(curr, tg)) {
>>> -		now = evl_ktime_monotonic();
>>> -
>>> -		elapsed = now - tg->run_start;
>>> -		if (elapsed < tg->run_budget)
>>> -			tg->run_budget -= elapsed;
>>> -		else
>>> -			tg->run_budget = 0;
>>> -
>>> -		tg->run_start = now;
>>> +	if (current_on_quota(tg)) {
>>> +		quota_out(rq->curr);
>>>  		evl_stop_timer(&qs->limit_timer);
>>>  	}
>>>  
>>> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>>>  						quota_expired) {
>>>  			list_del_init(&thread->quota_expired);
>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>> +			evl_add_schedq(&qs->runnable, thread);
>>>  		}
>>>  	}
>>>  
>>> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>>>  	.sched_dequeue		=	quota_dequeue,
>>>  	.sched_requeue		=	quota_requeue,
>>>  	.sched_pick		=	quota_pick,
>>> +	.sched_out		=	quota_out,
>>>  	.sched_migrate		=	quota_migrate,
>>>  	.sched_chkparam		=	quota_chkparam,
>>>  	.sched_setparam		=	quota_setparam,
>>>
>>
>> You also seem to miss the case I discovered only yesterday as well: When
>> starting the current thread on a quota via setparam, we also need to set
>> its run_start stamp at that point.
> 
> run_start should be meaningful only when a limit timer runs. 
> 

Right, but are you sure it will be started in the case I described...?

Jan

>>
>> Please have a closer look at both my test case (libevl) as well as my
>> patch 2. Currently, I don't see remaining issues with both, but I'm all
>> ears to learn what I missed.
>>
> 


-- 
Siemens AG, Foundational Technologies
Linux Expert Center

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  7:22         ` Jan Kiszka
@ 2026-06-15  7:30           ` Philippe Gerum
  2026-06-15  7:34             ` Philippe Gerum
  2026-06-15  8:34             ` Jan Kiszka
  0 siblings, 2 replies; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  7:30 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 15.06.26 09:17, Philippe Gerum wrote:
>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>> 
>>> On 15.06.26 08:42, Philippe Gerum wrote:
>>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>>
>>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>>
>>>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>
>>>>>> This shall be invoked before a thread switch, providing both the current
>>>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>>>> higher-weighted class is providing the next thread.
>>>>>>
>>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>> ---
>>>>>>  include/evl/sched.h     | 2 ++
>>>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>>>  2 files changed, 7 insertions(+)
>>>>>>
>>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>>>> --- a/include/evl/sched.h
>>>>>> +++ b/include/evl/sched.h
>>>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>>>> +			  struct evl_thread *next);
>>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>>  			      struct evl_rq *rq);
>>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>>>> --- a/kernel/evl/sched/core.c
>>>>>> +++ b/kernel/evl/sched/core.c
>>>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>>>   */
>>>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>  {
>>>>>> +	struct evl_sched_class *prev_schedclass;
>>>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>>>  	struct evl_thread *prev, *next, *curr;
>>>>>>  	bool leaving_inband, inband_tail;
>>>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>  	this_rq->curr = next;
>>>>>>  	leaving_inband = false;
>>>>>>  
>>>>>> +	prev_schedclass = prev->sched_class;
>>>>>> +	if (prev_schedclass->sched_out)
>>>>>> +		prev_schedclass->sched_out(prev, next);
>>>>>> +
>>>>>>  	/*
>>>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>>>  	 * performing the rest of the context switch code
>>>>>
>>>>> I've been working on this lately too. It turns out that we need more
>>>>> than this, although this is definitely part of the solution. I'll follow
>>>>> up on this issue.
>>>>
>>>> This is still wip, I'm sharing this early to discuss details for
>>>> reconciling both proposals (the hunk in __evl_schedule() is merely
>>>> cosmetic, no functional change).
>>>>
>>>
>>> Please avoid that for the final patch. I already had troubles telling
>>> substantial from cosmetical changes apart in your lazy-blocking commit
>>> (>50% unrelated changes in there).
>>>
>>>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>>>> Author: Philippe Gerum <rpm@xenomai.org>
>>>> Date:   Sun Jun 14 12:08:59 2026 +0200
>>>>
>>>>     evl: sched/quota: fix budget tracking on preemption
>>>>     
>>>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>>>     runtime budget of the former is inaccurately tracked. This is due to
>>>>     the fifo->pick() handler returning a valid thread, which prevents the
>>>>     quota->pick() handler from being called. As a result, the last runtime
>>>>     period of the outgoing thread is not accounted for.
>>>>     
>>>>     To fix this, we introduce a new sched_out() handler which is called
>>>>     for the outgoing thread, which the quota policy uses to update the
>>>>     remaining budget of preempted threads appropriately. In addition, the
>>>>     implementation no longer shares the runnable thread queue with
>>>>     SCHED_FIFO.
>>>
>>> ...and why we need a separate queue now? Please provide reasoning for
>>> changes.
>>>
>>>>     
>>>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>>>>
>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>> index ae9690860146..cc824c28004b 100644
>>>> --- a/include/evl/sched.h
>>>> +++ b/include/evl/sched.h
>>>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>> +	void (*sched_out)(struct evl_thread *thread);
>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>  			      struct evl_rq *rq);
>>>> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
>>>> index dfe3b7390958..dc8416645da8 100644
>>>> --- a/include/evl/sched/quota.h
>>>> +++ b/include/evl/sched/quota.h
>>>> @@ -40,6 +40,7 @@ struct evl_quota_group {
>>>>  
>>>>  struct evl_sched_quota {
>>>>  	ktime_t period;
>>>> +	struct evl_sched_queue runnable;
>>>>  	struct evl_timer refill_timer;
>>>>  	struct evl_timer limit_timer;
>>>>  	struct list_head groups;
>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>> index d1d025e06a5d..7127d4e52df9 100644
>>>> --- a/kernel/evl/sched/core.c
>>>> +++ b/kernel/evl/sched/core.c
>>>> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>>>>  		evl_stop_timer(&rq->rrbtimer);
>>>>  }
>>>>  
>>>> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>> +static __always_inline struct evl_thread *
>>>> +__pick_next_thread(struct evl_rq *rq)
>>>>  {
>>>>  	struct evl_sched_class *sched_class;
>>>>  	struct evl_thread *curr = rq->curr;
>>>> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>>>>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>>>>  {
>>>> -	struct evl_thread *next = __pick_next_thread(rq);
>>>> +	struct evl_thread *next, *prev = rq->curr;
>>>> +	struct evl_sched_class *prev_class = prev->sched_class;
>>>>  
>>>> +	if (prev_class->sched_out)
>>>> +		prev_class->sched_out(prev);
>>>> +
>>>> +	next = __pick_next_thread(rq);
>>>> +	trace_evl_pick_thread(next);
>>>>  	set_next_running(rq, next);
>>>>  
>>>>  	return next;
>>>> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>  		return;
>>>>  	}
>>>>  
>>>> +	prev = curr;
>>>>  	next = pick_next_thread(this_rq);
>>>> -	trace_evl_pick_thread(next);
>>>> -	if (next == curr) {
>>>> -		if (unlikely(next->state & EVL_T_ROOT)) {
>>>> +	if (next == prev) {
>>>> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>>>>  			if (this_rq->local_flags & RQ_TPROXY)
>>>>  				evl_notify_proxy_tick(this_rq);
>>>>  			if (this_rq->local_flags & RQ_TDEFER)
>>>>  				evl_program_local_tick(&evl_mono_clock);
>>>>  		}
>>>>  		raw_spin_unlock(&this_rq->lock);
>>>> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
>>>> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>>>>  		return;
>>>>  	}
>>>>  
>>>> -	prev = curr;
>>>>  	this_rq->curr = next;
>>>>  	leaving_inband = false;
>>>>  
>>>> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
>>>> index 0829da711a66..14748e3a843e 100644
>>>> --- a/kernel/evl/sched/quota.c
>>>> +++ b/kernel/evl/sched/quota.c
>>>> @@ -12,45 +12,35 @@
>>>>  #include <uapi/evl/sched-abi.h>
>>>>  
>>>>  /*
>>>> - * With this policy, each per-CPU runqueue maintains a list of active
>>>> - * thread groups for the sched_fifo class.
>>>> - *
>>>> - * Each time a thread is picked from the runqueue, we check whether we
>>>> - * still have budget for running it, looking at the group it belongs
>>>> - * to. If so, a timer is armed to elapse when that group has no more
>>>> - * budget, would the incoming thread run unpreempted until then
>>>> - * (i.e. evl_quota->limit_timer).
>>>> + * Each time a thread is picked from the ->runnable queue, we check
>>>> + * whether the group it belongs to still has runtime budget.  If so, a
>>>> + * timer is armed to fire when that group has no more budget, would
>>>> + * the incoming thread run unpreempted until then
>>>> + * (i.e. quota->limit_timer).
>>>>   *
>>>>   * Otherwise, if no budget remains in the group for running the
>>>>   * candidate thread, we move the latter to a local expiry queue
>>>>   * maintained by the group. This process is done on the fly as we pull
>>>> - * from the runqueue.
>>>> + * from the ->runnable queue.
>>>>   *
>>>> - * Updating the remaining budget is done each time the EVL core asks
>>>> - * for replacing the current thread with the next runnable one,
>>>> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
>>>> - * outgoing thread to the relevant group, and conversely, we check
>>>> - * whether the incoming thread has budget.
>>>> + * Updating the remaining budget is done each time the EVL core
>>>> + * schedules out a thread undergoing the quota scheduling policy,
>>>>   *
>>>> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
>>>> - * ticks in the background, in accordance to the defined quota
>>>> - * interval. Thread group budgets get replenished by its handler in
>>>> - * accordance to their respective share, pushing all expired threads
>>>> - * back to the run queue in the same move.
>>>> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
>>>> + * in the background, in accordance to the defined quota interval,
>>>> + * replenishing per-group budgets, pushing all expired threads back to
>>>> + * the quota ->runqueue too.
>>>>   *
>>>> - * NOTE: since the core logic enforcing the budget entirely happens in
>>>> - * evl_quota_pick(), applying a budget change can be done as simply as
>>>> - * forcing the rescheduling procedure to be invoked asap. As a result
>>>> - * of this, the EVL core will ask for the next thread to run, which
>>>> - * means calling evl_quota_pick() eventually.
>>>> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
>>>> + * apply a budget change.
>>>>   *
>>>> - * CAUTION: evl_quota_group->nr_active does count both the threads
>>>> - * from that group linked to the sched_fifo runqueue, _and_ the
>>>> - * threads moved to the local expiry queue. As a matter of fact, the
>>>> - * expired threads - those for which we consumed all the per-group
>>>> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
>>>> - * the EVL core. This only means that the SCHED_QUOTA policy won't
>>>> - * pick them until the corresponding budget is replenished.
>>>> + * CAUTION: quota_group->nr_active does count both the threads from
>>>> + * that group linked to the runnable queue, _and_ the threads moved to
>>>> + * the local expiry queue. As a matter of fact, the expired threads -
>>>> + * those for which we consumed all the per-group budget - are still
>>>> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
>>>> + * only means that the SCHED_QUOTA policy won't pick them until the
>>>> + * corresponding budget is replenished.
>>>>   */
>>>>  
>>>>  #define MAX_QUOTA_GROUPS  1024
>>>> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>>>>  
>>>>  static LIST_HEAD(group_list);
>>>>  
>>>> -static inline bool thread_on_quota(struct evl_thread *thread,
>>>> -				struct evl_quota_group *tg)
>>>> +static inline bool current_on_quota(struct evl_quota_group *tg)
>>>>  {
>>>> -	/*
>>>> -	 * Check whether @thread is running on some CPU, and belongs
>>>> -	 * to quota group @tg.
>>>> -	 */
>>>> -	return thread->quota == tg &&
>>>> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
>>>> +	struct evl_rq *rq = tg->rq;
>>>> +	struct evl_thread *curr = rq->curr;
>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>> +
>>>> +	if (curr->quota != tg)
>>>> +		return false;
>>>> +
>>>> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
>>>> +		return false;
>>>> +
>>>> +	return evl_timer_is_running(&qs->limit_timer);
>>>>  }
>>>>  
>>>>  static inline bool group_is_active(struct evl_quota_group *tg)
>>>> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>>>>  	 * runqueue, in which case tg->nr_active already accounted for
>>>>  	 * it.
>>>>  	 */
>>>> -	return thread_on_quota(tg->rq->curr, tg);
>>>> +	return current_on_quota(tg);
>>>>  }
>>>>  
>>>>  static inline void replenish_budget(struct evl_sched_quota *qs,
>>>> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>  	} else if (tg->run_credit) {
>>>>  		credit = ktime_sub(tg->quota_peak, budget);
>>>>  		/* Consume the accumulated credit. */
>>>> -		if (tg->run_credit >= credit)
>>>> +		if (tg->run_credit >= credit) {
>>>>  			tg->run_credit =
>>>>  				ktime_sub(tg->run_credit, credit);
>>>> -		else {
>>>> +		} else {
>>>>  			credit = tg->run_credit;
>>>>  			tg->run_credit = 0;
>>>>  		}
>>>> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>  
>>>>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>  {
>>>> -	struct evl_quota_group *tg;
>>>>  	struct evl_thread *thread, *tmp;
>>>> +	struct evl_quota_group *tg;
>>>>  	struct evl_sched_quota *qs;
>>>>  	struct evl_rq *rq;
>>>>  
>>>> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>>>>  			continue;
>>>>  		/*
>>>> -		 * For each group living on this CPU, move all expired
>>>> +		 * For each group pinned on this CPU, move all expired
>>>>  		 * threads back to the runqueue. Since those threads
>>>>  		 * were moved out of the runqueue as we were
>>>>  		 * considering them for execution, we push them back
>>>> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>  		list_for_each_entry_safe_reverse(thread, tmp,
>>>>  						&tg->expired, quota_expired) {
>>>>  			list_del_init(&thread->quota_expired);
>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>  		}
>>>>  	}
>>>>  
>>>> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>  	/*
>>>>  	 * Force a rescheduling on the return path of the current
>>>>  	 * interrupt, so that the budget is re-evaluated for the
>>>> -	 * current group in evl_quota_pick().
>>>> +	 * current group in quota_pick().
>>>>  	 */
>>>>  	raw_spin_lock(&rq->lock);
>>>>  	evl_set_self_resched(rq);
>>>> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>>>>  {
>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>>  
>>>> +	evl_init_schedq(&qs->runnable);
>>>>  	qs->period = quota_period;
>>>>  	INIT_LIST_HEAD(&qs->groups);
>>>>  
>>>> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>>>>  
>>>>  static void quota_kick(struct evl_thread *thread)
>>>>  {
>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>  	struct evl_quota_group *tg = thread->quota;
>>>> -	struct evl_rq *rq = thread->rq;
>>>>  
>>>>  	/*
>>>>  	 * Allow a kicked thread to be elected for running until it
>>>> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>>>>  	 */
>>>>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>>>>  		list_del_init(&thread->quota_expired);
>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>>>>  
>>>>  static void quota_enqueue(struct evl_thread *thread)
>>>>  {
>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>  	struct evl_quota_group *tg = thread->quota;
>>>> -	struct evl_rq *rq = thread->rq;
>>>>  
>>>>  	if (!thread_is_runnable(thread))
>>>>  		list_add_tail(&thread->quota_expired, &tg->expired);
>>>>  	else
>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>  
>>>>  	tg->nr_active++;
>>>>  }
>>>>  
>>>>  static void quota_dequeue(struct evl_thread *thread)
>>>>  {
>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>  	struct evl_quota_group *tg = thread->quota;
>>>> -	struct evl_rq *rq = thread->rq;
>>>>  
>>>>  	if (!list_empty(&thread->quota_expired))
>>>>  		list_del_init(&thread->quota_expired);
>>>>  	else
>>>> -		evl_del_schedq(&rq->fifo.runnable, thread);
>>>> +		evl_del_schedq(&qs->runnable, thread);
>>>>  
>>>>  	tg->nr_active--;
>>>>  }
>>>>  
>>>>  static void quota_requeue(struct evl_thread *thread)
>>>>  {
>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>  	struct evl_quota_group *tg = thread->quota;
>>>> -	struct evl_rq *rq = thread->rq;
>>>>  
>>>>  	if (!thread_is_runnable(thread))
>>>>  		list_add(&thread->quota_expired, &tg->expired);
>>>>  	else
>>>> -		evl_add_schedq(&rq->fifo.runnable, thread);
>>>> +		evl_add_schedq(&qs->runnable, thread);
>>>>  
>>>>  	tg->nr_active++;
>>>>  }
>>>>  
>>>> -static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>> +static void quota_out(struct evl_thread *thread)
>>>>  {
>>>> -	struct evl_thread *next, *curr = rq->curr;
>>>> -	struct evl_sched_quota *qs = &rq->quota;
>>>> -	struct evl_quota_group *otg, *tg;
>>>> -	ktime_t now, elapsed;
>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>> +	struct evl_quota_group *tg = thread->quota;
>>>> +	ktime_t now, consumed;
>>>> +
>>>> +	/* Timer off means that we are not tracking quota. */
>>>> +	if (!evl_timer_is_running(&qs->limit_timer))
>>>> +		return;
>>>>  
>>>> -	now = evl_ktime_monotonic();
>>>> -	otg = curr->quota;
>>>> -	if (otg == NULL)
>>>> -		goto pick;
>>>>  	/*
>>>>  	 * Charge the time consumed by the outgoing thread to the
>>>>  	 * group it belongs to.
>>>>  	 */
>>>> -	elapsed = ktime_sub(now, otg->run_start);
>>>> -	if (elapsed < otg->run_budget)
>>>> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
>>>> -	else
>>>> -		otg->run_budget = 0;
>>>> +	now = evl_ktime_monotonic();
>>>
>>> Thing brins some, though minor, glitch when doing the run_start update
>>> for the next group already in pick. That's why I'm trying to reuse that
>>> stamp when available.
>>>
>>>> +	consumed = ktime_sub(now, tg->run_start);
>>>> +	if (consumed < tg->run_budget) {
>>>> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>>>> +	} else {
>>>> +		tg->run_budget = 0;
>>>> +		evl_stop_timer(&qs->limit_timer);
>>>> +	}
>>>> +}
>>>> +
>>>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>> +{
>>>> +	struct evl_thread *next, *curr = rq->curr;
>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>> +	struct evl_quota_group *tg;
>>>> +
>>>>  pick:
>>>> -	next = evl_get_schedq(&rq->fifo.runnable);
>>>> +	next = evl_get_schedq(&qs->runnable);
>>>>  	if (next == NULL) {
>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>  		return NULL;
>>>>  	}
>>>>  
>>>> -	/*
>>>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>>>> -	 * sure to detect non-quota threads.
>>>> -	 */
>>>>  	tg = next->quota;
>>>> -	if (tg == NULL)
>>>> -		return next;
>>>> -
>>>> -	tg->run_start = now;
>>>> +	tg->nr_active--;
>>>>  
>>>>  	/*
>>>>  	 * Don't consider budget if kicked, we have to allow this
>>>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>  	 */
>>>>  	if (next->info & EVL_T_KICKED) {
>>>>  		evl_stop_timer(&qs->limit_timer);
>>>> -		goto out;
>>>> +		return next;
>>>>  	}
>>>>  
>>>> +	/*
>>>> +	 * __pick_next_thread() might have requeued the current
>>>> +	 * thread which is still leading the pack.
>>>> +	 */
>>>> +	if (curr == next)
>>>> +		return next;
>>>> +
>>>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>>>> -		/* Flush expired group members as we go. */
>>>> +		/* Park expired group members as we go. */
>>>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>>>  		goto pick;
>>>>  	}
>>>>  
>>>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>>>> -		/* Same group, leave the running timer untouched. */
>>>> -		goto out;
>>>> -
>>>> -	/* Arm limit timer for the new running group. */
>>>> -	evl_start_timer(&qs->limit_timer,
>>>> -			ktime_add(now, tg->run_budget),
>>>> -			EVL_INFINITE);
>>>> -out:
>>>> -	tg->nr_active--;
>>>> +	/* Arm new limit timer on change of running group. */
>>>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>>>> +		tg->run_start = evl_ktime_monotonic();
>>>> +		evl_start_timer(&qs->limit_timer,
>>>> +				ktime_add(tg->run_start, tg->run_budget),
>>>> +				EVL_INFINITE);
>>>> +	}
>>>>  
>>>>  	return next;
>>>>  }
>>>> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>  	 * Unregister the group before we drop rq->lock. As a result,
>>>>  	 * it won't accept threads anymore while we are busy moving
>>>>  	 * the current members to the fifo class, and concurrent
>>>> -	 * evl_quota_remove requests would receive -EINVAL.
>>>> +	 * quota_remove requests would receive -EINVAL.
>>>>  	 */
>>>>  	__clear_bit(tg->tgid, group_map);
>>>>  	list_del(&tg->next);
>>>> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>>>>  	 * ABBA issue. No livelock is possible since we unregistered
>>>>  	 * that group already, so &tg->members can only be depleted
>>>> -	 * (by this loop specifically).
>>>> +	 * (by this loop exclusively).
>>>>  	 */
>>>>  
>>>>  	while (!list_empty(&tg->members)) {
>>>> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>  			int *quota_sum_r)
>>>>  {
>>>>  	struct evl_rq *rq = tg->rq;
>>>> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
>>>> +	struct evl_thread *thread, *tmp;
>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>> -	ktime_t now, elapsed, consumed;
>>>>  	ktime_t old_quota = tg->quota;
>>>> +	ktime_t consumed;
>>>>  	u64 n;
>>>>  
>>>>  	assert_hard_lock(&rq->lock);
>>>> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>  	tg->quota_percent = quota_percent;
>>>>  	tg->quota_peak_percent = quota_peak_percent;
>>>>  
>>>> -	if (thread_on_quota(curr, tg)) {
>>>> -		now = evl_ktime_monotonic();
>>>> -
>>>> -		elapsed = now - tg->run_start;
>>>> -		if (elapsed < tg->run_budget)
>>>> -			tg->run_budget -= elapsed;
>>>> -		else
>>>> -			tg->run_budget = 0;
>>>> -
>>>> -		tg->run_start = now;
>>>> +	if (current_on_quota(tg)) {
>>>> +		quota_out(rq->curr);
>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>  	}
>>>>  
>>>> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>>>>  						quota_expired) {
>>>>  			list_del_init(&thread->quota_expired);
>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>  		}
>>>>  	}
>>>>  
>>>> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>>>>  	.sched_dequeue		=	quota_dequeue,
>>>>  	.sched_requeue		=	quota_requeue,
>>>>  	.sched_pick		=	quota_pick,
>>>> +	.sched_out		=	quota_out,
>>>>  	.sched_migrate		=	quota_migrate,
>>>>  	.sched_chkparam		=	quota_chkparam,
>>>>  	.sched_setparam		=	quota_setparam,
>>>>
>>>
>>> You also seem to miss the case I discovered only yesterday as well: When
>>> starting the current thread on a quota via setparam, we also need to set
>>> its run_start stamp at that point.
>> 
>> run_start should be meaningful only when a limit timer runs. 
>> 
>
> Right, but are you sure it will be started in the case I described...?
>

To be started, it must be picked, so yes.

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  7:30           ` Philippe Gerum
@ 2026-06-15  7:34             ` Philippe Gerum
  2026-06-15  8:34             ` Jan Kiszka
  1 sibling, 0 replies; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  7:34 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Philippe Gerum <rpm@xenomai.org> writes:

> Jan Kiszka <jan.kiszka@siemens.com> writes:
>
>> On 15.06.26 09:17, Philippe Gerum wrote:
>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>> 
>>>> On 15.06.26 08:42, Philippe Gerum wrote:
>>>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>>>
>>>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>>>
>>>>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>>
>>>>>>> This shall be invoked before a thread switch, providing both the current
>>>>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>>>>> higher-weighted class is providing the next thread.
>>>>>>>
>>>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>> ---
>>>>>>>  include/evl/sched.h     | 2 ++
>>>>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>>>>  2 files changed, 7 insertions(+)
>>>>>>>
>>>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>>>>> --- a/include/evl/sched.h
>>>>>>> +++ b/include/evl/sched.h
>>>>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>>>>> +			  struct evl_thread *next);
>>>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>>>  			      struct evl_rq *rq);
>>>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>>>>> --- a/kernel/evl/sched/core.c
>>>>>>> +++ b/kernel/evl/sched/core.c
>>>>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>>>>   */
>>>>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>>  {
>>>>>>> +	struct evl_sched_class *prev_schedclass;
>>>>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>>>>  	struct evl_thread *prev, *next, *curr;
>>>>>>>  	bool leaving_inband, inband_tail;
>>>>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>>  	this_rq->curr = next;
>>>>>>>  	leaving_inband = false;
>>>>>>>  
>>>>>>> +	prev_schedclass = prev->sched_class;
>>>>>>> +	if (prev_schedclass->sched_out)
>>>>>>> +		prev_schedclass->sched_out(prev, next);
>>>>>>> +
>>>>>>>  	/*
>>>>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>>>>  	 * performing the rest of the context switch code
>>>>>>
>>>>>> I've been working on this lately too. It turns out that we need more
>>>>>> than this, although this is definitely part of the solution. I'll follow
>>>>>> up on this issue.
>>>>>
>>>>> This is still wip, I'm sharing this early to discuss details for
>>>>> reconciling both proposals (the hunk in __evl_schedule() is merely
>>>>> cosmetic, no functional change).
>>>>>
>>>>
>>>> Please avoid that for the final patch. I already had troubles telling
>>>> substantial from cosmetical changes apart in your lazy-blocking commit
>>>> (>50% unrelated changes in there).
>>>>
>>>>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>>>>> Author: Philippe Gerum <rpm@xenomai.org>
>>>>> Date:   Sun Jun 14 12:08:59 2026 +0200
>>>>>
>>>>>     evl: sched/quota: fix budget tracking on preemption
>>>>>     
>>>>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>>>>     runtime budget of the former is inaccurately tracked. This is due to
>>>>>     the fifo->pick() handler returning a valid thread, which prevents the
>>>>>     quota->pick() handler from being called. As a result, the last runtime
>>>>>     period of the outgoing thread is not accounted for.
>>>>>     
>>>>>     To fix this, we introduce a new sched_out() handler which is called
>>>>>     for the outgoing thread, which the quota policy uses to update the
>>>>>     remaining budget of preempted threads appropriately. In addition, the
>>>>>     implementation no longer shares the runnable thread queue with
>>>>>     SCHED_FIFO.
>>>>
>>>> ...and why we need a separate queue now? Please provide reasoning for
>>>> changes.
>>>>
>>>>>     
>>>>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>>>>>
>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>> index ae9690860146..cc824c28004b 100644
>>>>> --- a/include/evl/sched.h
>>>>> +++ b/include/evl/sched.h
>>>>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>> +	void (*sched_out)(struct evl_thread *thread);
>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>  			      struct evl_rq *rq);
>>>>> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
>>>>> index dfe3b7390958..dc8416645da8 100644
>>>>> --- a/include/evl/sched/quota.h
>>>>> +++ b/include/evl/sched/quota.h
>>>>> @@ -40,6 +40,7 @@ struct evl_quota_group {
>>>>>  
>>>>>  struct evl_sched_quota {
>>>>>  	ktime_t period;
>>>>> +	struct evl_sched_queue runnable;
>>>>>  	struct evl_timer refill_timer;
>>>>>  	struct evl_timer limit_timer;
>>>>>  	struct list_head groups;
>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>> index d1d025e06a5d..7127d4e52df9 100644
>>>>> --- a/kernel/evl/sched/core.c
>>>>> +++ b/kernel/evl/sched/core.c
>>>>> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>>>>>  		evl_stop_timer(&rq->rrbtimer);
>>>>>  }
>>>>>  
>>>>> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>>> +static __always_inline struct evl_thread *
>>>>> +__pick_next_thread(struct evl_rq *rq)
>>>>>  {
>>>>>  	struct evl_sched_class *sched_class;
>>>>>  	struct evl_thread *curr = rq->curr;
>>>>> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>>>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>>>>>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>>>>>  {
>>>>> -	struct evl_thread *next = __pick_next_thread(rq);
>>>>> +	struct evl_thread *next, *prev = rq->curr;
>>>>> +	struct evl_sched_class *prev_class = prev->sched_class;
>>>>>  
>>>>> +	if (prev_class->sched_out)
>>>>> +		prev_class->sched_out(prev);
>>>>> +
>>>>> +	next = __pick_next_thread(rq);
>>>>> +	trace_evl_pick_thread(next);
>>>>>  	set_next_running(rq, next);
>>>>>  
>>>>>  	return next;
>>>>> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>  		return;
>>>>>  	}
>>>>>  
>>>>> +	prev = curr;
>>>>>  	next = pick_next_thread(this_rq);
>>>>> -	trace_evl_pick_thread(next);
>>>>> -	if (next == curr) {
>>>>> -		if (unlikely(next->state & EVL_T_ROOT)) {
>>>>> +	if (next == prev) {
>>>>> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>>>>>  			if (this_rq->local_flags & RQ_TPROXY)
>>>>>  				evl_notify_proxy_tick(this_rq);
>>>>>  			if (this_rq->local_flags & RQ_TDEFER)
>>>>>  				evl_program_local_tick(&evl_mono_clock);
>>>>>  		}
>>>>>  		raw_spin_unlock(&this_rq->lock);
>>>>> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
>>>>> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>>>>>  		return;
>>>>>  	}
>>>>>  
>>>>> -	prev = curr;
>>>>>  	this_rq->curr = next;
>>>>>  	leaving_inband = false;
>>>>>  
>>>>> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
>>>>> index 0829da711a66..14748e3a843e 100644
>>>>> --- a/kernel/evl/sched/quota.c
>>>>> +++ b/kernel/evl/sched/quota.c
>>>>> @@ -12,45 +12,35 @@
>>>>>  #include <uapi/evl/sched-abi.h>
>>>>>  
>>>>>  /*
>>>>> - * With this policy, each per-CPU runqueue maintains a list of active
>>>>> - * thread groups for the sched_fifo class.
>>>>> - *
>>>>> - * Each time a thread is picked from the runqueue, we check whether we
>>>>> - * still have budget for running it, looking at the group it belongs
>>>>> - * to. If so, a timer is armed to elapse when that group has no more
>>>>> - * budget, would the incoming thread run unpreempted until then
>>>>> - * (i.e. evl_quota->limit_timer).
>>>>> + * Each time a thread is picked from the ->runnable queue, we check
>>>>> + * whether the group it belongs to still has runtime budget.  If so, a
>>>>> + * timer is armed to fire when that group has no more budget, would
>>>>> + * the incoming thread run unpreempted until then
>>>>> + * (i.e. quota->limit_timer).
>>>>>   *
>>>>>   * Otherwise, if no budget remains in the group for running the
>>>>>   * candidate thread, we move the latter to a local expiry queue
>>>>>   * maintained by the group. This process is done on the fly as we pull
>>>>> - * from the runqueue.
>>>>> + * from the ->runnable queue.
>>>>>   *
>>>>> - * Updating the remaining budget is done each time the EVL core asks
>>>>> - * for replacing the current thread with the next runnable one,
>>>>> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
>>>>> - * outgoing thread to the relevant group, and conversely, we check
>>>>> - * whether the incoming thread has budget.
>>>>> + * Updating the remaining budget is done each time the EVL core
>>>>> + * schedules out a thread undergoing the quota scheduling policy,
>>>>>   *
>>>>> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
>>>>> - * ticks in the background, in accordance to the defined quota
>>>>> - * interval. Thread group budgets get replenished by its handler in
>>>>> - * accordance to their respective share, pushing all expired threads
>>>>> - * back to the run queue in the same move.
>>>>> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
>>>>> + * in the background, in accordance to the defined quota interval,
>>>>> + * replenishing per-group budgets, pushing all expired threads back to
>>>>> + * the quota ->runqueue too.
>>>>>   *
>>>>> - * NOTE: since the core logic enforcing the budget entirely happens in
>>>>> - * evl_quota_pick(), applying a budget change can be done as simply as
>>>>> - * forcing the rescheduling procedure to be invoked asap. As a result
>>>>> - * of this, the EVL core will ask for the next thread to run, which
>>>>> - * means calling evl_quota_pick() eventually.
>>>>> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
>>>>> + * apply a budget change.
>>>>>   *
>>>>> - * CAUTION: evl_quota_group->nr_active does count both the threads
>>>>> - * from that group linked to the sched_fifo runqueue, _and_ the
>>>>> - * threads moved to the local expiry queue. As a matter of fact, the
>>>>> - * expired threads - those for which we consumed all the per-group
>>>>> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
>>>>> - * the EVL core. This only means that the SCHED_QUOTA policy won't
>>>>> - * pick them until the corresponding budget is replenished.
>>>>> + * CAUTION: quota_group->nr_active does count both the threads from
>>>>> + * that group linked to the runnable queue, _and_ the threads moved to
>>>>> + * the local expiry queue. As a matter of fact, the expired threads -
>>>>> + * those for which we consumed all the per-group budget - are still
>>>>> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
>>>>> + * only means that the SCHED_QUOTA policy won't pick them until the
>>>>> + * corresponding budget is replenished.
>>>>>   */
>>>>>  
>>>>>  #define MAX_QUOTA_GROUPS  1024
>>>>> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>>>>>  
>>>>>  static LIST_HEAD(group_list);
>>>>>  
>>>>> -static inline bool thread_on_quota(struct evl_thread *thread,
>>>>> -				struct evl_quota_group *tg)
>>>>> +static inline bool current_on_quota(struct evl_quota_group *tg)
>>>>>  {
>>>>> -	/*
>>>>> -	 * Check whether @thread is running on some CPU, and belongs
>>>>> -	 * to quota group @tg.
>>>>> -	 */
>>>>> -	return thread->quota == tg &&
>>>>> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
>>>>> +	struct evl_rq *rq = tg->rq;
>>>>> +	struct evl_thread *curr = rq->curr;
>>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>>> +
>>>>> +	if (curr->quota != tg)
>>>>> +		return false;
>>>>> +
>>>>> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
>>>>> +		return false;
>>>>> +
>>>>> +	return evl_timer_is_running(&qs->limit_timer);
>>>>>  }
>>>>>  
>>>>>  static inline bool group_is_active(struct evl_quota_group *tg)
>>>>> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>>>>>  	 * runqueue, in which case tg->nr_active already accounted for
>>>>>  	 * it.
>>>>>  	 */
>>>>> -	return thread_on_quota(tg->rq->curr, tg);
>>>>> +	return current_on_quota(tg);
>>>>>  }
>>>>>  
>>>>>  static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>>  	} else if (tg->run_credit) {
>>>>>  		credit = ktime_sub(tg->quota_peak, budget);
>>>>>  		/* Consume the accumulated credit. */
>>>>> -		if (tg->run_credit >= credit)
>>>>> +		if (tg->run_credit >= credit) {
>>>>>  			tg->run_credit =
>>>>>  				ktime_sub(tg->run_credit, credit);
>>>>> -		else {
>>>>> +		} else {
>>>>>  			credit = tg->run_credit;
>>>>>  			tg->run_credit = 0;
>>>>>  		}
>>>>> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>>  
>>>>>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>>  {
>>>>> -	struct evl_quota_group *tg;
>>>>>  	struct evl_thread *thread, *tmp;
>>>>> +	struct evl_quota_group *tg;
>>>>>  	struct evl_sched_quota *qs;
>>>>>  	struct evl_rq *rq;
>>>>>  
>>>>> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>>>>>  			continue;
>>>>>  		/*
>>>>> -		 * For each group living on this CPU, move all expired
>>>>> +		 * For each group pinned on this CPU, move all expired
>>>>>  		 * threads back to the runqueue. Since those threads
>>>>>  		 * were moved out of the runqueue as we were
>>>>>  		 * considering them for execution, we push them back
>>>>> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>>  		list_for_each_entry_safe_reverse(thread, tmp,
>>>>>  						&tg->expired, quota_expired) {
>>>>>  			list_del_init(&thread->quota_expired);
>>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>>  		}
>>>>>  	}
>>>>>  
>>>>> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>>  	/*
>>>>>  	 * Force a rescheduling on the return path of the current
>>>>>  	 * interrupt, so that the budget is re-evaluated for the
>>>>> -	 * current group in evl_quota_pick().
>>>>> +	 * current group in quota_pick().
>>>>>  	 */
>>>>>  	raw_spin_lock(&rq->lock);
>>>>>  	evl_set_self_resched(rq);
>>>>> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>>>>>  {
>>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>>>  
>>>>> +	evl_init_schedq(&qs->runnable);
>>>>>  	qs->period = quota_period;
>>>>>  	INIT_LIST_HEAD(&qs->groups);
>>>>>  
>>>>> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>>>>>  
>>>>>  static void quota_kick(struct evl_thread *thread)
>>>>>  {
>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>  
>>>>>  	/*
>>>>>  	 * Allow a kicked thread to be elected for running until it
>>>>> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>>>>>  	 */
>>>>>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>>>>>  		list_del_init(&thread->quota_expired);
>>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>>>>>  
>>>>>  static void quota_enqueue(struct evl_thread *thread)
>>>>>  {
>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>  
>>>>>  	if (!thread_is_runnable(thread))
>>>>>  		list_add_tail(&thread->quota_expired, &tg->expired);
>>>>>  	else
>>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>>  
>>>>>  	tg->nr_active++;
>>>>>  }
>>>>>  
>>>>>  static void quota_dequeue(struct evl_thread *thread)
>>>>>  {
>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>  
>>>>>  	if (!list_empty(&thread->quota_expired))
>>>>>  		list_del_init(&thread->quota_expired);
>>>>>  	else
>>>>> -		evl_del_schedq(&rq->fifo.runnable, thread);
>>>>> +		evl_del_schedq(&qs->runnable, thread);
>>>>>  
>>>>>  	tg->nr_active--;
>>>>>  }
>>>>>  
>>>>>  static void quota_requeue(struct evl_thread *thread)
>>>>>  {
>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>  
>>>>>  	if (!thread_is_runnable(thread))
>>>>>  		list_add(&thread->quota_expired, &tg->expired);
>>>>>  	else
>>>>> -		evl_add_schedq(&rq->fifo.runnable, thread);
>>>>> +		evl_add_schedq(&qs->runnable, thread);
>>>>>  
>>>>>  	tg->nr_active++;
>>>>>  }
>>>>>  
>>>>> -static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>> +static void quota_out(struct evl_thread *thread)
>>>>>  {
>>>>> -	struct evl_thread *next, *curr = rq->curr;
>>>>> -	struct evl_sched_quota *qs = &rq->quota;
>>>>> -	struct evl_quota_group *otg, *tg;
>>>>> -	ktime_t now, elapsed;
>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>> +	struct evl_quota_group *tg = thread->quota;
>>>>> +	ktime_t now, consumed;
>>>>> +
>>>>> +	/* Timer off means that we are not tracking quota. */
>>>>> +	if (!evl_timer_is_running(&qs->limit_timer))
>>>>> +		return;
>>>>>  
>>>>> -	now = evl_ktime_monotonic();
>>>>> -	otg = curr->quota;
>>>>> -	if (otg == NULL)
>>>>> -		goto pick;
>>>>>  	/*
>>>>>  	 * Charge the time consumed by the outgoing thread to the
>>>>>  	 * group it belongs to.
>>>>>  	 */
>>>>> -	elapsed = ktime_sub(now, otg->run_start);
>>>>> -	if (elapsed < otg->run_budget)
>>>>> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
>>>>> -	else
>>>>> -		otg->run_budget = 0;
>>>>> +	now = evl_ktime_monotonic();
>>>>
>>>> Thing brins some, though minor, glitch when doing the run_start update
>>>> for the next group already in pick. That's why I'm trying to reuse that
>>>> stamp when available.
>>>>
>>>>> +	consumed = ktime_sub(now, tg->run_start);
>>>>> +	if (consumed < tg->run_budget) {
>>>>> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>>>>> +	} else {
>>>>> +		tg->run_budget = 0;
>>>>> +		evl_stop_timer(&qs->limit_timer);
>>>>> +	}
>>>>> +}
>>>>> +
>>>>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>> +{
>>>>> +	struct evl_thread *next, *curr = rq->curr;
>>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>>> +	struct evl_quota_group *tg;
>>>>> +
>>>>>  pick:
>>>>> -	next = evl_get_schedq(&rq->fifo.runnable);
>>>>> +	next = evl_get_schedq(&qs->runnable);
>>>>>  	if (next == NULL) {
>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>>  		return NULL;
>>>>>  	}
>>>>>  
>>>>> -	/*
>>>>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>>>>> -	 * sure to detect non-quota threads.
>>>>> -	 */
>>>>>  	tg = next->quota;
>>>>> -	if (tg == NULL)
>>>>> -		return next;
>>>>> -
>>>>> -	tg->run_start = now;
>>>>> +	tg->nr_active--;
>>>>>  
>>>>>  	/*
>>>>>  	 * Don't consider budget if kicked, we have to allow this
>>>>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>>  	 */
>>>>>  	if (next->info & EVL_T_KICKED) {
>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>> -		goto out;
>>>>> +		return next;
>>>>>  	}
>>>>>  
>>>>> +	/*
>>>>> +	 * __pick_next_thread() might have requeued the current
>>>>> +	 * thread which is still leading the pack.
>>>>> +	 */
>>>>> +	if (curr == next)
>>>>> +		return next;
>>>>> +
>>>>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>>>>> -		/* Flush expired group members as we go. */
>>>>> +		/* Park expired group members as we go. */
>>>>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>>>>  		goto pick;
>>>>>  	}
>>>>>  
>>>>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>>>>> -		/* Same group, leave the running timer untouched. */
>>>>> -		goto out;
>>>>> -
>>>>> -	/* Arm limit timer for the new running group. */
>>>>> -	evl_start_timer(&qs->limit_timer,
>>>>> -			ktime_add(now, tg->run_budget),
>>>>> -			EVL_INFINITE);
>>>>> -out:
>>>>> -	tg->nr_active--;
>>>>> +	/* Arm new limit timer on change of running group. */
>>>>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>>>>> +		tg->run_start = evl_ktime_monotonic();
>>>>> +		evl_start_timer(&qs->limit_timer,
>>>>> +				ktime_add(tg->run_start, tg->run_budget),
>>>>> +				EVL_INFINITE);
>>>>> +	}
>>>>>  
>>>>>  	return next;
>>>>>  }
>>>>> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>>  	 * Unregister the group before we drop rq->lock. As a result,
>>>>>  	 * it won't accept threads anymore while we are busy moving
>>>>>  	 * the current members to the fifo class, and concurrent
>>>>> -	 * evl_quota_remove requests would receive -EINVAL.
>>>>> +	 * quota_remove requests would receive -EINVAL.
>>>>>  	 */
>>>>>  	__clear_bit(tg->tgid, group_map);
>>>>>  	list_del(&tg->next);
>>>>> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>>>>>  	 * ABBA issue. No livelock is possible since we unregistered
>>>>>  	 * that group already, so &tg->members can only be depleted
>>>>> -	 * (by this loop specifically).
>>>>> +	 * (by this loop exclusively).
>>>>>  	 */
>>>>>  
>>>>>  	while (!list_empty(&tg->members)) {
>>>>> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>  			int *quota_sum_r)
>>>>>  {
>>>>>  	struct evl_rq *rq = tg->rq;
>>>>> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
>>>>> +	struct evl_thread *thread, *tmp;
>>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>>> -	ktime_t now, elapsed, consumed;
>>>>>  	ktime_t old_quota = tg->quota;
>>>>> +	ktime_t consumed;
>>>>>  	u64 n;
>>>>>  
>>>>>  	assert_hard_lock(&rq->lock);
>>>>> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>  	tg->quota_percent = quota_percent;
>>>>>  	tg->quota_peak_percent = quota_peak_percent;
>>>>>  
>>>>> -	if (thread_on_quota(curr, tg)) {
>>>>> -		now = evl_ktime_monotonic();
>>>>> -
>>>>> -		elapsed = now - tg->run_start;
>>>>> -		if (elapsed < tg->run_budget)
>>>>> -			tg->run_budget -= elapsed;
>>>>> -		else
>>>>> -			tg->run_budget = 0;
>>>>> -
>>>>> -		tg->run_start = now;
>>>>> +	if (current_on_quota(tg)) {
>>>>> +		quota_out(rq->curr);
>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>>  	}
>>>>>  
>>>>> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>>>>>  						quota_expired) {
>>>>>  			list_del_init(&thread->quota_expired);
>>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>>  		}
>>>>>  	}
>>>>>  
>>>>> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>>>>>  	.sched_dequeue		=	quota_dequeue,
>>>>>  	.sched_requeue		=	quota_requeue,
>>>>>  	.sched_pick		=	quota_pick,
>>>>> +	.sched_out		=	quota_out,
>>>>>  	.sched_migrate		=	quota_migrate,
>>>>>  	.sched_chkparam		=	quota_chkparam,
>>>>>  	.sched_setparam		=	quota_setparam,
>>>>>
>>>>
>>>> You also seem to miss the case I discovered only yesterday as well: When
>>>> starting the current thread on a quota via setparam, we also need to set
>>>> its run_start stamp at that point.
>>> 
>>> run_start should be meaningful only when a limit timer runs. 
>>> 
>>
>> Right, but are you sure it will be started in the case I described...?
>>
>
> To be started, it must be picked, so yes.

The way the original implementation dealt with ->run_start was a mess
(proudly brought to you by /me). The basic idea for fixing this is to
acknowledge that we have no reason to have a limit timer running unless
we have a group to charge with the budget consumed by the current
thread. Changing the code to reflect that basic point makes it much
clearer I believe.

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  7:30           ` Philippe Gerum
  2026-06-15  7:34             ` Philippe Gerum
@ 2026-06-15  8:34             ` Jan Kiszka
  2026-06-15  8:53               ` Philippe Gerum
  2026-06-15  9:02               ` Philippe Gerum
  1 sibling, 2 replies; 24+ messages in thread
From: Jan Kiszka @ 2026-06-15  8:34 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: Xenomai

On 15.06.26 09:30, Philippe Gerum wrote:
> Jan Kiszka <jan.kiszka@siemens.com> writes:
> 
>> On 15.06.26 09:17, Philippe Gerum wrote:
>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>
>>>> On 15.06.26 08:42, Philippe Gerum wrote:
>>>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>>>
>>>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>>>
>>>>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>>
>>>>>>> This shall be invoked before a thread switch, providing both the current
>>>>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>>>>> higher-weighted class is providing the next thread.
>>>>>>>
>>>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>> ---
>>>>>>>  include/evl/sched.h     | 2 ++
>>>>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>>>>  2 files changed, 7 insertions(+)
>>>>>>>
>>>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>>>>> --- a/include/evl/sched.h
>>>>>>> +++ b/include/evl/sched.h
>>>>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>>>>> +			  struct evl_thread *next);
>>>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>>>  			      struct evl_rq *rq);
>>>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>>>>> --- a/kernel/evl/sched/core.c
>>>>>>> +++ b/kernel/evl/sched/core.c
>>>>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>>>>   */
>>>>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>>  {
>>>>>>> +	struct evl_sched_class *prev_schedclass;
>>>>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>>>>  	struct evl_thread *prev, *next, *curr;
>>>>>>>  	bool leaving_inband, inband_tail;
>>>>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>>  	this_rq->curr = next;
>>>>>>>  	leaving_inband = false;
>>>>>>>  
>>>>>>> +	prev_schedclass = prev->sched_class;
>>>>>>> +	if (prev_schedclass->sched_out)
>>>>>>> +		prev_schedclass->sched_out(prev, next);
>>>>>>> +
>>>>>>>  	/*
>>>>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>>>>  	 * performing the rest of the context switch code
>>>>>>
>>>>>> I've been working on this lately too. It turns out that we need more
>>>>>> than this, although this is definitely part of the solution. I'll follow
>>>>>> up on this issue.
>>>>>
>>>>> This is still wip, I'm sharing this early to discuss details for
>>>>> reconciling both proposals (the hunk in __evl_schedule() is merely
>>>>> cosmetic, no functional change).
>>>>>
>>>>
>>>> Please avoid that for the final patch. I already had troubles telling
>>>> substantial from cosmetical changes apart in your lazy-blocking commit
>>>> (>50% unrelated changes in there).
>>>>
>>>>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>>>>> Author: Philippe Gerum <rpm@xenomai.org>
>>>>> Date:   Sun Jun 14 12:08:59 2026 +0200
>>>>>
>>>>>     evl: sched/quota: fix budget tracking on preemption
>>>>>     
>>>>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>>>>     runtime budget of the former is inaccurately tracked. This is due to
>>>>>     the fifo->pick() handler returning a valid thread, which prevents the
>>>>>     quota->pick() handler from being called. As a result, the last runtime
>>>>>     period of the outgoing thread is not accounted for.
>>>>>     
>>>>>     To fix this, we introduce a new sched_out() handler which is called
>>>>>     for the outgoing thread, which the quota policy uses to update the
>>>>>     remaining budget of preempted threads appropriately. In addition, the
>>>>>     implementation no longer shares the runnable thread queue with
>>>>>     SCHED_FIFO.
>>>>
>>>> ...and why we need a separate queue now? Please provide reasoning for
>>>> changes.
>>>>
>>>>>     
>>>>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>>>>>
>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>> index ae9690860146..cc824c28004b 100644
>>>>> --- a/include/evl/sched.h
>>>>> +++ b/include/evl/sched.h
>>>>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>> +	void (*sched_out)(struct evl_thread *thread);
>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>  			      struct evl_rq *rq);
>>>>> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
>>>>> index dfe3b7390958..dc8416645da8 100644
>>>>> --- a/include/evl/sched/quota.h
>>>>> +++ b/include/evl/sched/quota.h
>>>>> @@ -40,6 +40,7 @@ struct evl_quota_group {
>>>>>  
>>>>>  struct evl_sched_quota {
>>>>>  	ktime_t period;
>>>>> +	struct evl_sched_queue runnable;
>>>>>  	struct evl_timer refill_timer;
>>>>>  	struct evl_timer limit_timer;
>>>>>  	struct list_head groups;
>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>> index d1d025e06a5d..7127d4e52df9 100644
>>>>> --- a/kernel/evl/sched/core.c
>>>>> +++ b/kernel/evl/sched/core.c
>>>>> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>>>>>  		evl_stop_timer(&rq->rrbtimer);
>>>>>  }
>>>>>  
>>>>> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>>> +static __always_inline struct evl_thread *
>>>>> +__pick_next_thread(struct evl_rq *rq)
>>>>>  {
>>>>>  	struct evl_sched_class *sched_class;
>>>>>  	struct evl_thread *curr = rq->curr;
>>>>> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>>>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>>>>>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>>>>>  {
>>>>> -	struct evl_thread *next = __pick_next_thread(rq);
>>>>> +	struct evl_thread *next, *prev = rq->curr;
>>>>> +	struct evl_sched_class *prev_class = prev->sched_class;
>>>>>  
>>>>> +	if (prev_class->sched_out)
>>>>> +		prev_class->sched_out(prev);
>>>>> +
>>>>> +	next = __pick_next_thread(rq);
>>>>> +	trace_evl_pick_thread(next);
>>>>>  	set_next_running(rq, next);
>>>>>  
>>>>>  	return next;
>>>>> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>  		return;
>>>>>  	}
>>>>>  
>>>>> +	prev = curr;
>>>>>  	next = pick_next_thread(this_rq);
>>>>> -	trace_evl_pick_thread(next);
>>>>> -	if (next == curr) {
>>>>> -		if (unlikely(next->state & EVL_T_ROOT)) {
>>>>> +	if (next == prev) {
>>>>> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>>>>>  			if (this_rq->local_flags & RQ_TPROXY)
>>>>>  				evl_notify_proxy_tick(this_rq);
>>>>>  			if (this_rq->local_flags & RQ_TDEFER)
>>>>>  				evl_program_local_tick(&evl_mono_clock);
>>>>>  		}
>>>>>  		raw_spin_unlock(&this_rq->lock);
>>>>> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
>>>>> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>>>>>  		return;
>>>>>  	}
>>>>>  
>>>>> -	prev = curr;
>>>>>  	this_rq->curr = next;
>>>>>  	leaving_inband = false;
>>>>>  
>>>>> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
>>>>> index 0829da711a66..14748e3a843e 100644
>>>>> --- a/kernel/evl/sched/quota.c
>>>>> +++ b/kernel/evl/sched/quota.c
>>>>> @@ -12,45 +12,35 @@
>>>>>  #include <uapi/evl/sched-abi.h>
>>>>>  
>>>>>  /*
>>>>> - * With this policy, each per-CPU runqueue maintains a list of active
>>>>> - * thread groups for the sched_fifo class.
>>>>> - *
>>>>> - * Each time a thread is picked from the runqueue, we check whether we
>>>>> - * still have budget for running it, looking at the group it belongs
>>>>> - * to. If so, a timer is armed to elapse when that group has no more
>>>>> - * budget, would the incoming thread run unpreempted until then
>>>>> - * (i.e. evl_quota->limit_timer).
>>>>> + * Each time a thread is picked from the ->runnable queue, we check
>>>>> + * whether the group it belongs to still has runtime budget.  If so, a
>>>>> + * timer is armed to fire when that group has no more budget, would
>>>>> + * the incoming thread run unpreempted until then
>>>>> + * (i.e. quota->limit_timer).
>>>>>   *
>>>>>   * Otherwise, if no budget remains in the group for running the
>>>>>   * candidate thread, we move the latter to a local expiry queue
>>>>>   * maintained by the group. This process is done on the fly as we pull
>>>>> - * from the runqueue.
>>>>> + * from the ->runnable queue.
>>>>>   *
>>>>> - * Updating the remaining budget is done each time the EVL core asks
>>>>> - * for replacing the current thread with the next runnable one,
>>>>> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
>>>>> - * outgoing thread to the relevant group, and conversely, we check
>>>>> - * whether the incoming thread has budget.
>>>>> + * Updating the remaining budget is done each time the EVL core
>>>>> + * schedules out a thread undergoing the quota scheduling policy,
>>>>>   *
>>>>> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
>>>>> - * ticks in the background, in accordance to the defined quota
>>>>> - * interval. Thread group budgets get replenished by its handler in
>>>>> - * accordance to their respective share, pushing all expired threads
>>>>> - * back to the run queue in the same move.
>>>>> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
>>>>> + * in the background, in accordance to the defined quota interval,
>>>>> + * replenishing per-group budgets, pushing all expired threads back to
>>>>> + * the quota ->runqueue too.
>>>>>   *
>>>>> - * NOTE: since the core logic enforcing the budget entirely happens in
>>>>> - * evl_quota_pick(), applying a budget change can be done as simply as
>>>>> - * forcing the rescheduling procedure to be invoked asap. As a result
>>>>> - * of this, the EVL core will ask for the next thread to run, which
>>>>> - * means calling evl_quota_pick() eventually.
>>>>> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
>>>>> + * apply a budget change.
>>>>>   *
>>>>> - * CAUTION: evl_quota_group->nr_active does count both the threads
>>>>> - * from that group linked to the sched_fifo runqueue, _and_ the
>>>>> - * threads moved to the local expiry queue. As a matter of fact, the
>>>>> - * expired threads - those for which we consumed all the per-group
>>>>> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
>>>>> - * the EVL core. This only means that the SCHED_QUOTA policy won't
>>>>> - * pick them until the corresponding budget is replenished.
>>>>> + * CAUTION: quota_group->nr_active does count both the threads from
>>>>> + * that group linked to the runnable queue, _and_ the threads moved to
>>>>> + * the local expiry queue. As a matter of fact, the expired threads -
>>>>> + * those for which we consumed all the per-group budget - are still
>>>>> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
>>>>> + * only means that the SCHED_QUOTA policy won't pick them until the
>>>>> + * corresponding budget is replenished.
>>>>>   */
>>>>>  
>>>>>  #define MAX_QUOTA_GROUPS  1024
>>>>> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>>>>>  
>>>>>  static LIST_HEAD(group_list);
>>>>>  
>>>>> -static inline bool thread_on_quota(struct evl_thread *thread,
>>>>> -				struct evl_quota_group *tg)
>>>>> +static inline bool current_on_quota(struct evl_quota_group *tg)
>>>>>  {
>>>>> -	/*
>>>>> -	 * Check whether @thread is running on some CPU, and belongs
>>>>> -	 * to quota group @tg.
>>>>> -	 */
>>>>> -	return thread->quota == tg &&
>>>>> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
>>>>> +	struct evl_rq *rq = tg->rq;
>>>>> +	struct evl_thread *curr = rq->curr;
>>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>>> +
>>>>> +	if (curr->quota != tg)
>>>>> +		return false;
>>>>> +
>>>>> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
>>>>> +		return false;
>>>>> +
>>>>> +	return evl_timer_is_running(&qs->limit_timer);
>>>>>  }
>>>>>  
>>>>>  static inline bool group_is_active(struct evl_quota_group *tg)
>>>>> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>>>>>  	 * runqueue, in which case tg->nr_active already accounted for
>>>>>  	 * it.
>>>>>  	 */
>>>>> -	return thread_on_quota(tg->rq->curr, tg);
>>>>> +	return current_on_quota(tg);
>>>>>  }
>>>>>  
>>>>>  static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>>  	} else if (tg->run_credit) {
>>>>>  		credit = ktime_sub(tg->quota_peak, budget);
>>>>>  		/* Consume the accumulated credit. */
>>>>> -		if (tg->run_credit >= credit)
>>>>> +		if (tg->run_credit >= credit) {
>>>>>  			tg->run_credit =
>>>>>  				ktime_sub(tg->run_credit, credit);
>>>>> -		else {
>>>>> +		} else {
>>>>>  			credit = tg->run_credit;
>>>>>  			tg->run_credit = 0;
>>>>>  		}
>>>>> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>>  
>>>>>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>>  {
>>>>> -	struct evl_quota_group *tg;
>>>>>  	struct evl_thread *thread, *tmp;
>>>>> +	struct evl_quota_group *tg;
>>>>>  	struct evl_sched_quota *qs;
>>>>>  	struct evl_rq *rq;
>>>>>  
>>>>> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>>>>>  			continue;
>>>>>  		/*
>>>>> -		 * For each group living on this CPU, move all expired
>>>>> +		 * For each group pinned on this CPU, move all expired
>>>>>  		 * threads back to the runqueue. Since those threads
>>>>>  		 * were moved out of the runqueue as we were
>>>>>  		 * considering them for execution, we push them back
>>>>> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>>  		list_for_each_entry_safe_reverse(thread, tmp,
>>>>>  						&tg->expired, quota_expired) {
>>>>>  			list_del_init(&thread->quota_expired);
>>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>>  		}
>>>>>  	}
>>>>>  
>>>>> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>>  	/*
>>>>>  	 * Force a rescheduling on the return path of the current
>>>>>  	 * interrupt, so that the budget is re-evaluated for the
>>>>> -	 * current group in evl_quota_pick().
>>>>> +	 * current group in quota_pick().
>>>>>  	 */
>>>>>  	raw_spin_lock(&rq->lock);
>>>>>  	evl_set_self_resched(rq);
>>>>> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>>>>>  {
>>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>>>  
>>>>> +	evl_init_schedq(&qs->runnable);
>>>>>  	qs->period = quota_period;
>>>>>  	INIT_LIST_HEAD(&qs->groups);
>>>>>  
>>>>> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>>>>>  
>>>>>  static void quota_kick(struct evl_thread *thread)
>>>>>  {
>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>  
>>>>>  	/*
>>>>>  	 * Allow a kicked thread to be elected for running until it
>>>>> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>>>>>  	 */
>>>>>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>>>>>  		list_del_init(&thread->quota_expired);
>>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>>>>>  
>>>>>  static void quota_enqueue(struct evl_thread *thread)
>>>>>  {
>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>  
>>>>>  	if (!thread_is_runnable(thread))
>>>>>  		list_add_tail(&thread->quota_expired, &tg->expired);
>>>>>  	else
>>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>>  
>>>>>  	tg->nr_active++;
>>>>>  }
>>>>>  
>>>>>  static void quota_dequeue(struct evl_thread *thread)
>>>>>  {
>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>  
>>>>>  	if (!list_empty(&thread->quota_expired))
>>>>>  		list_del_init(&thread->quota_expired);
>>>>>  	else
>>>>> -		evl_del_schedq(&rq->fifo.runnable, thread);
>>>>> +		evl_del_schedq(&qs->runnable, thread);
>>>>>  
>>>>>  	tg->nr_active--;
>>>>>  }
>>>>>  
>>>>>  static void quota_requeue(struct evl_thread *thread)
>>>>>  {
>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>  
>>>>>  	if (!thread_is_runnable(thread))
>>>>>  		list_add(&thread->quota_expired, &tg->expired);
>>>>>  	else
>>>>> -		evl_add_schedq(&rq->fifo.runnable, thread);
>>>>> +		evl_add_schedq(&qs->runnable, thread);
>>>>>  
>>>>>  	tg->nr_active++;
>>>>>  }
>>>>>  
>>>>> -static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>> +static void quota_out(struct evl_thread *thread)
>>>>>  {
>>>>> -	struct evl_thread *next, *curr = rq->curr;
>>>>> -	struct evl_sched_quota *qs = &rq->quota;
>>>>> -	struct evl_quota_group *otg, *tg;
>>>>> -	ktime_t now, elapsed;
>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>> +	struct evl_quota_group *tg = thread->quota;
>>>>> +	ktime_t now, consumed;
>>>>> +
>>>>> +	/* Timer off means that we are not tracking quota. */
>>>>> +	if (!evl_timer_is_running(&qs->limit_timer))
>>>>> +		return;
>>>>>  
>>>>> -	now = evl_ktime_monotonic();
>>>>> -	otg = curr->quota;
>>>>> -	if (otg == NULL)
>>>>> -		goto pick;
>>>>>  	/*
>>>>>  	 * Charge the time consumed by the outgoing thread to the
>>>>>  	 * group it belongs to.
>>>>>  	 */
>>>>> -	elapsed = ktime_sub(now, otg->run_start);
>>>>> -	if (elapsed < otg->run_budget)
>>>>> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
>>>>> -	else
>>>>> -		otg->run_budget = 0;
>>>>> +	now = evl_ktime_monotonic();
>>>>
>>>> Thing brins some, though minor, glitch when doing the run_start update
>>>> for the next group already in pick. That's why I'm trying to reuse that
>>>> stamp when available.
>>>>
>>>>> +	consumed = ktime_sub(now, tg->run_start);
>>>>> +	if (consumed < tg->run_budget) {
>>>>> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>>>>> +	} else {
>>>>> +		tg->run_budget = 0;
>>>>> +		evl_stop_timer(&qs->limit_timer);
>>>>> +	}
>>>>> +}
>>>>> +
>>>>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>> +{
>>>>> +	struct evl_thread *next, *curr = rq->curr;
>>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>>> +	struct evl_quota_group *tg;
>>>>> +
>>>>>  pick:
>>>>> -	next = evl_get_schedq(&rq->fifo.runnable);
>>>>> +	next = evl_get_schedq(&qs->runnable);
>>>>>  	if (next == NULL) {
>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>>  		return NULL;
>>>>>  	}
>>>>>  
>>>>> -	/*
>>>>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>>>>> -	 * sure to detect non-quota threads.
>>>>> -	 */
>>>>>  	tg = next->quota;
>>>>> -	if (tg == NULL)
>>>>> -		return next;
>>>>> -
>>>>> -	tg->run_start = now;
>>>>> +	tg->nr_active--;
>>>>>  
>>>>>  	/*
>>>>>  	 * Don't consider budget if kicked, we have to allow this
>>>>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>>  	 */
>>>>>  	if (next->info & EVL_T_KICKED) {
>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>> -		goto out;
>>>>> +		return next;
>>>>>  	}
>>>>>  
>>>>> +	/*
>>>>> +	 * __pick_next_thread() might have requeued the current
>>>>> +	 * thread which is still leading the pack.
>>>>> +	 */
>>>>> +	if (curr == next)
>>>>> +		return next;
>>>>> +
>>>>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>>>>> -		/* Flush expired group members as we go. */
>>>>> +		/* Park expired group members as we go. */
>>>>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>>>>  		goto pick;
>>>>>  	}
>>>>>  
>>>>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>>>>> -		/* Same group, leave the running timer untouched. */
>>>>> -		goto out;
>>>>> -
>>>>> -	/* Arm limit timer for the new running group. */
>>>>> -	evl_start_timer(&qs->limit_timer,
>>>>> -			ktime_add(now, tg->run_budget),
>>>>> -			EVL_INFINITE);
>>>>> -out:
>>>>> -	tg->nr_active--;
>>>>> +	/* Arm new limit timer on change of running group. */
>>>>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>>>>> +		tg->run_start = evl_ktime_monotonic();
>>>>> +		evl_start_timer(&qs->limit_timer,
>>>>> +				ktime_add(tg->run_start, tg->run_budget),
>>>>> +				EVL_INFINITE);
>>>>> +	}
>>>>>  
>>>>>  	return next;
>>>>>  }
>>>>> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>>  	 * Unregister the group before we drop rq->lock. As a result,
>>>>>  	 * it won't accept threads anymore while we are busy moving
>>>>>  	 * the current members to the fifo class, and concurrent
>>>>> -	 * evl_quota_remove requests would receive -EINVAL.
>>>>> +	 * quota_remove requests would receive -EINVAL.
>>>>>  	 */
>>>>>  	__clear_bit(tg->tgid, group_map);
>>>>>  	list_del(&tg->next);
>>>>> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>>>>>  	 * ABBA issue. No livelock is possible since we unregistered
>>>>>  	 * that group already, so &tg->members can only be depleted
>>>>> -	 * (by this loop specifically).
>>>>> +	 * (by this loop exclusively).
>>>>>  	 */
>>>>>  
>>>>>  	while (!list_empty(&tg->members)) {
>>>>> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>  			int *quota_sum_r)
>>>>>  {
>>>>>  	struct evl_rq *rq = tg->rq;
>>>>> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
>>>>> +	struct evl_thread *thread, *tmp;
>>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>>> -	ktime_t now, elapsed, consumed;
>>>>>  	ktime_t old_quota = tg->quota;
>>>>> +	ktime_t consumed;
>>>>>  	u64 n;
>>>>>  
>>>>>  	assert_hard_lock(&rq->lock);
>>>>> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>  	tg->quota_percent = quota_percent;
>>>>>  	tg->quota_peak_percent = quota_peak_percent;
>>>>>  
>>>>> -	if (thread_on_quota(curr, tg)) {
>>>>> -		now = evl_ktime_monotonic();
>>>>> -
>>>>> -		elapsed = now - tg->run_start;
>>>>> -		if (elapsed < tg->run_budget)
>>>>> -			tg->run_budget -= elapsed;
>>>>> -		else
>>>>> -			tg->run_budget = 0;
>>>>> -
>>>>> -		tg->run_start = now;
>>>>> +	if (current_on_quota(tg)) {
>>>>> +		quota_out(rq->curr);
>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>>  	}
>>>>>  
>>>>> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>>>>>  						quota_expired) {
>>>>>  			list_del_init(&thread->quota_expired);
>>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>>  		}
>>>>>  	}
>>>>>  
>>>>> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>>>>>  	.sched_dequeue		=	quota_dequeue,
>>>>>  	.sched_requeue		=	quota_requeue,
>>>>>  	.sched_pick		=	quota_pick,
>>>>> +	.sched_out		=	quota_out,
>>>>>  	.sched_migrate		=	quota_migrate,
>>>>>  	.sched_chkparam		=	quota_chkparam,
>>>>>  	.sched_setparam		=	quota_setparam,
>>>>>
>>>>
>>>> You also seem to miss the case I discovered only yesterday as well: When
>>>> starting the current thread on a quota via setparam, we also need to set
>>>> its run_start stamp at that point.
>>>
>>> run_start should be meaningful only when a limit timer runs. 
>>>
>>
>> Right, but are you sure it will be started in the case I described...?
>>
> 
> To be started, it must be picked, so yes.
> 

But prev == next then - nothing new to pick, no?

But let me apply your patch and stop only looking at the diff.

Jan

-- 
Siemens AG, Foundational Technologies
Linux Expert Center

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  8:34             ` Jan Kiszka
@ 2026-06-15  8:53               ` Philippe Gerum
  2026-06-15  9:02               ` Philippe Gerum
  1 sibling, 0 replies; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  8:53 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 15.06.26 09:30, Philippe Gerum wrote:
>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>> 
>>> On 15.06.26 09:17, Philippe Gerum wrote:
>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>
>>>>> On 15.06.26 08:42, Philippe Gerum wrote:
>>>>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>>>>
>>>>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>>>>
>>>>>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>>>
>>>>>>>> This shall be invoked before a thread switch, providing both the current
>>>>>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>>>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>>>>>> higher-weighted class is providing the next thread.
>>>>>>>>
>>>>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>>> ---
>>>>>>>>  include/evl/sched.h     | 2 ++
>>>>>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>>>>>  2 files changed, 7 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>>>>>> --- a/include/evl/sched.h
>>>>>>>> +++ b/include/evl/sched.h
>>>>>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>>>>>> +			  struct evl_thread *next);
>>>>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>>>>  			      struct evl_rq *rq);
>>>>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>>>>>> --- a/kernel/evl/sched/core.c
>>>>>>>> +++ b/kernel/evl/sched/core.c
>>>>>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>>>>>   */
>>>>>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>>>  {
>>>>>>>> +	struct evl_sched_class *prev_schedclass;
>>>>>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>>>>>  	struct evl_thread *prev, *next, *curr;
>>>>>>>>  	bool leaving_inband, inband_tail;
>>>>>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>>>  	this_rq->curr = next;
>>>>>>>>  	leaving_inband = false;
>>>>>>>>  
>>>>>>>> +	prev_schedclass = prev->sched_class;
>>>>>>>> +	if (prev_schedclass->sched_out)
>>>>>>>> +		prev_schedclass->sched_out(prev, next);
>>>>>>>> +
>>>>>>>>  	/*
>>>>>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>>>>>  	 * performing the rest of the context switch code
>>>>>>>
>>>>>>> I've been working on this lately too. It turns out that we need more
>>>>>>> than this, although this is definitely part of the solution. I'll follow
>>>>>>> up on this issue.
>>>>>>
>>>>>> This is still wip, I'm sharing this early to discuss details for
>>>>>> reconciling both proposals (the hunk in __evl_schedule() is merely
>>>>>> cosmetic, no functional change).
>>>>>>
>>>>>
>>>>> Please avoid that for the final patch. I already had troubles telling
>>>>> substantial from cosmetical changes apart in your lazy-blocking commit
>>>>> (>50% unrelated changes in there).
>>>>>
>>>>>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>>>>>> Author: Philippe Gerum <rpm@xenomai.org>
>>>>>> Date:   Sun Jun 14 12:08:59 2026 +0200
>>>>>>
>>>>>>     evl: sched/quota: fix budget tracking on preemption
>>>>>>     
>>>>>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>>>>>     runtime budget of the former is inaccurately tracked. This is due to
>>>>>>     the fifo->pick() handler returning a valid thread, which prevents the
>>>>>>     quota->pick() handler from being called. As a result, the last runtime
>>>>>>     period of the outgoing thread is not accounted for.
>>>>>>     
>>>>>>     To fix this, we introduce a new sched_out() handler which is called
>>>>>>     for the outgoing thread, which the quota policy uses to update the
>>>>>>     remaining budget of preempted threads appropriately. In addition, the
>>>>>>     implementation no longer shares the runnable thread queue with
>>>>>>     SCHED_FIFO.
>>>>>
>>>>> ...and why we need a separate queue now? Please provide reasoning for
>>>>> changes.
>>>>>
>>>>>>     
>>>>>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>>>>>>
>>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>>> index ae9690860146..cc824c28004b 100644
>>>>>> --- a/include/evl/sched.h
>>>>>> +++ b/include/evl/sched.h
>>>>>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>>> +	void (*sched_out)(struct evl_thread *thread);
>>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>>  			      struct evl_rq *rq);
>>>>>> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
>>>>>> index dfe3b7390958..dc8416645da8 100644
>>>>>> --- a/include/evl/sched/quota.h
>>>>>> +++ b/include/evl/sched/quota.h
>>>>>> @@ -40,6 +40,7 @@ struct evl_quota_group {
>>>>>>  
>>>>>>  struct evl_sched_quota {
>>>>>>  	ktime_t period;
>>>>>> +	struct evl_sched_queue runnable;
>>>>>>  	struct evl_timer refill_timer;
>>>>>>  	struct evl_timer limit_timer;
>>>>>>  	struct list_head groups;
>>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>>> index d1d025e06a5d..7127d4e52df9 100644
>>>>>> --- a/kernel/evl/sched/core.c
>>>>>> +++ b/kernel/evl/sched/core.c
>>>>>> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>>>>>>  		evl_stop_timer(&rq->rrbtimer);
>>>>>>  }
>>>>>>  
>>>>>> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>>>> +static __always_inline struct evl_thread *
>>>>>> +__pick_next_thread(struct evl_rq *rq)
>>>>>>  {
>>>>>>  	struct evl_sched_class *sched_class;
>>>>>>  	struct evl_thread *curr = rq->curr;
>>>>>> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>>>>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>>>>>>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>>>>>>  {
>>>>>> -	struct evl_thread *next = __pick_next_thread(rq);
>>>>>> +	struct evl_thread *next, *prev = rq->curr;
>>>>>> +	struct evl_sched_class *prev_class = prev->sched_class;
>>>>>>  
>>>>>> +	if (prev_class->sched_out)
>>>>>> +		prev_class->sched_out(prev);
>>>>>> +
>>>>>> +	next = __pick_next_thread(rq);
>>>>>> +	trace_evl_pick_thread(next);
>>>>>>  	set_next_running(rq, next);
>>>>>>  
>>>>>>  	return next;
>>>>>> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>  		return;
>>>>>>  	}
>>>>>>  
>>>>>> +	prev = curr;
>>>>>>  	next = pick_next_thread(this_rq);
>>>>>> -	trace_evl_pick_thread(next);
>>>>>> -	if (next == curr) {
>>>>>> -		if (unlikely(next->state & EVL_T_ROOT)) {
>>>>>> +	if (next == prev) {
>>>>>> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>>>>>>  			if (this_rq->local_flags & RQ_TPROXY)
>>>>>>  				evl_notify_proxy_tick(this_rq);
>>>>>>  			if (this_rq->local_flags & RQ_TDEFER)
>>>>>>  				evl_program_local_tick(&evl_mono_clock);
>>>>>>  		}
>>>>>>  		raw_spin_unlock(&this_rq->lock);
>>>>>> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
>>>>>> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>>>>>>  		return;
>>>>>>  	}
>>>>>>  
>>>>>> -	prev = curr;
>>>>>>  	this_rq->curr = next;
>>>>>>  	leaving_inband = false;
>>>>>>  
>>>>>> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
>>>>>> index 0829da711a66..14748e3a843e 100644
>>>>>> --- a/kernel/evl/sched/quota.c
>>>>>> +++ b/kernel/evl/sched/quota.c
>>>>>> @@ -12,45 +12,35 @@
>>>>>>  #include <uapi/evl/sched-abi.h>
>>>>>>  
>>>>>>  /*
>>>>>> - * With this policy, each per-CPU runqueue maintains a list of active
>>>>>> - * thread groups for the sched_fifo class.
>>>>>> - *
>>>>>> - * Each time a thread is picked from the runqueue, we check whether we
>>>>>> - * still have budget for running it, looking at the group it belongs
>>>>>> - * to. If so, a timer is armed to elapse when that group has no more
>>>>>> - * budget, would the incoming thread run unpreempted until then
>>>>>> - * (i.e. evl_quota->limit_timer).
>>>>>> + * Each time a thread is picked from the ->runnable queue, we check
>>>>>> + * whether the group it belongs to still has runtime budget.  If so, a
>>>>>> + * timer is armed to fire when that group has no more budget, would
>>>>>> + * the incoming thread run unpreempted until then
>>>>>> + * (i.e. quota->limit_timer).
>>>>>>   *
>>>>>>   * Otherwise, if no budget remains in the group for running the
>>>>>>   * candidate thread, we move the latter to a local expiry queue
>>>>>>   * maintained by the group. This process is done on the fly as we pull
>>>>>> - * from the runqueue.
>>>>>> + * from the ->runnable queue.
>>>>>>   *
>>>>>> - * Updating the remaining budget is done each time the EVL core asks
>>>>>> - * for replacing the current thread with the next runnable one,
>>>>>> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
>>>>>> - * outgoing thread to the relevant group, and conversely, we check
>>>>>> - * whether the incoming thread has budget.
>>>>>> + * Updating the remaining budget is done each time the EVL core
>>>>>> + * schedules out a thread undergoing the quota scheduling policy,
>>>>>>   *
>>>>>> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
>>>>>> - * ticks in the background, in accordance to the defined quota
>>>>>> - * interval. Thread group budgets get replenished by its handler in
>>>>>> - * accordance to their respective share, pushing all expired threads
>>>>>> - * back to the run queue in the same move.
>>>>>> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
>>>>>> + * in the background, in accordance to the defined quota interval,
>>>>>> + * replenishing per-group budgets, pushing all expired threads back to
>>>>>> + * the quota ->runqueue too.
>>>>>>   *
>>>>>> - * NOTE: since the core logic enforcing the budget entirely happens in
>>>>>> - * evl_quota_pick(), applying a budget change can be done as simply as
>>>>>> - * forcing the rescheduling procedure to be invoked asap. As a result
>>>>>> - * of this, the EVL core will ask for the next thread to run, which
>>>>>> - * means calling evl_quota_pick() eventually.
>>>>>> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
>>>>>> + * apply a budget change.
>>>>>>   *
>>>>>> - * CAUTION: evl_quota_group->nr_active does count both the threads
>>>>>> - * from that group linked to the sched_fifo runqueue, _and_ the
>>>>>> - * threads moved to the local expiry queue. As a matter of fact, the
>>>>>> - * expired threads - those for which we consumed all the per-group
>>>>>> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
>>>>>> - * the EVL core. This only means that the SCHED_QUOTA policy won't
>>>>>> - * pick them until the corresponding budget is replenished.
>>>>>> + * CAUTION: quota_group->nr_active does count both the threads from
>>>>>> + * that group linked to the runnable queue, _and_ the threads moved to
>>>>>> + * the local expiry queue. As a matter of fact, the expired threads -
>>>>>> + * those for which we consumed all the per-group budget - are still
>>>>>> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
>>>>>> + * only means that the SCHED_QUOTA policy won't pick them until the
>>>>>> + * corresponding budget is replenished.
>>>>>>   */
>>>>>>  
>>>>>>  #define MAX_QUOTA_GROUPS  1024
>>>>>> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>>>>>>  
>>>>>>  static LIST_HEAD(group_list);
>>>>>>  
>>>>>> -static inline bool thread_on_quota(struct evl_thread *thread,
>>>>>> -				struct evl_quota_group *tg)
>>>>>> +static inline bool current_on_quota(struct evl_quota_group *tg)
>>>>>>  {
>>>>>> -	/*
>>>>>> -	 * Check whether @thread is running on some CPU, and belongs
>>>>>> -	 * to quota group @tg.
>>>>>> -	 */
>>>>>> -	return thread->quota == tg &&
>>>>>> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
>>>>>> +	struct evl_rq *rq = tg->rq;
>>>>>> +	struct evl_thread *curr = rq->curr;
>>>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>>>> +
>>>>>> +	if (curr->quota != tg)
>>>>>> +		return false;
>>>>>> +
>>>>>> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
>>>>>> +		return false;
>>>>>> +
>>>>>> +	return evl_timer_is_running(&qs->limit_timer);
>>>>>>  }
>>>>>>  
>>>>>>  static inline bool group_is_active(struct evl_quota_group *tg)
>>>>>> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>>>>>>  	 * runqueue, in which case tg->nr_active already accounted for
>>>>>>  	 * it.
>>>>>>  	 */
>>>>>> -	return thread_on_quota(tg->rq->curr, tg);
>>>>>> +	return current_on_quota(tg);
>>>>>>  }
>>>>>>  
>>>>>>  static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>>> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>>>  	} else if (tg->run_credit) {
>>>>>>  		credit = ktime_sub(tg->quota_peak, budget);
>>>>>>  		/* Consume the accumulated credit. */
>>>>>> -		if (tg->run_credit >= credit)
>>>>>> +		if (tg->run_credit >= credit) {
>>>>>>  			tg->run_credit =
>>>>>>  				ktime_sub(tg->run_credit, credit);
>>>>>> -		else {
>>>>>> +		} else {
>>>>>>  			credit = tg->run_credit;
>>>>>>  			tg->run_credit = 0;
>>>>>>  		}
>>>>>> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>>>  
>>>>>>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>>>  {
>>>>>> -	struct evl_quota_group *tg;
>>>>>>  	struct evl_thread *thread, *tmp;
>>>>>> +	struct evl_quota_group *tg;
>>>>>>  	struct evl_sched_quota *qs;
>>>>>>  	struct evl_rq *rq;
>>>>>>  
>>>>>> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>>>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>>>>>>  			continue;
>>>>>>  		/*
>>>>>> -		 * For each group living on this CPU, move all expired
>>>>>> +		 * For each group pinned on this CPU, move all expired
>>>>>>  		 * threads back to the runqueue. Since those threads
>>>>>>  		 * were moved out of the runqueue as we were
>>>>>>  		 * considering them for execution, we push them back
>>>>>> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>>>  		list_for_each_entry_safe_reverse(thread, tmp,
>>>>>>  						&tg->expired, quota_expired) {
>>>>>>  			list_del_init(&thread->quota_expired);
>>>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>>>  		}
>>>>>>  	}
>>>>>>  
>>>>>> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>>>  	/*
>>>>>>  	 * Force a rescheduling on the return path of the current
>>>>>>  	 * interrupt, so that the budget is re-evaluated for the
>>>>>> -	 * current group in evl_quota_pick().
>>>>>> +	 * current group in quota_pick().
>>>>>>  	 */
>>>>>>  	raw_spin_lock(&rq->lock);
>>>>>>  	evl_set_self_resched(rq);
>>>>>> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>>>>>>  {
>>>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>>>>  
>>>>>> +	evl_init_schedq(&qs->runnable);
>>>>>>  	qs->period = quota_period;
>>>>>>  	INIT_LIST_HEAD(&qs->groups);
>>>>>>  
>>>>>> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>>>>>>  
>>>>>>  static void quota_kick(struct evl_thread *thread)
>>>>>>  {
>>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>>  
>>>>>>  	/*
>>>>>>  	 * Allow a kicked thread to be elected for running until it
>>>>>> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>>>>>>  	 */
>>>>>>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>>>>>>  		list_del_init(&thread->quota_expired);
>>>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>>>  	}
>>>>>>  }
>>>>>>  
>>>>>> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>>>>>>  
>>>>>>  static void quota_enqueue(struct evl_thread *thread)
>>>>>>  {
>>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>>  
>>>>>>  	if (!thread_is_runnable(thread))
>>>>>>  		list_add_tail(&thread->quota_expired, &tg->expired);
>>>>>>  	else
>>>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>>>  
>>>>>>  	tg->nr_active++;
>>>>>>  }
>>>>>>  
>>>>>>  static void quota_dequeue(struct evl_thread *thread)
>>>>>>  {
>>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>>  
>>>>>>  	if (!list_empty(&thread->quota_expired))
>>>>>>  		list_del_init(&thread->quota_expired);
>>>>>>  	else
>>>>>> -		evl_del_schedq(&rq->fifo.runnable, thread);
>>>>>> +		evl_del_schedq(&qs->runnable, thread);
>>>>>>  
>>>>>>  	tg->nr_active--;
>>>>>>  }
>>>>>>  
>>>>>>  static void quota_requeue(struct evl_thread *thread)
>>>>>>  {
>>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>>  
>>>>>>  	if (!thread_is_runnable(thread))
>>>>>>  		list_add(&thread->quota_expired, &tg->expired);
>>>>>>  	else
>>>>>> -		evl_add_schedq(&rq->fifo.runnable, thread);
>>>>>> +		evl_add_schedq(&qs->runnable, thread);
>>>>>>  
>>>>>>  	tg->nr_active++;
>>>>>>  }
>>>>>>  
>>>>>> -static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>>> +static void quota_out(struct evl_thread *thread)
>>>>>>  {
>>>>>> -	struct evl_thread *next, *curr = rq->curr;
>>>>>> -	struct evl_sched_quota *qs = &rq->quota;
>>>>>> -	struct evl_quota_group *otg, *tg;
>>>>>> -	ktime_t now, elapsed;
>>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>> +	struct evl_quota_group *tg = thread->quota;
>>>>>> +	ktime_t now, consumed;
>>>>>> +
>>>>>> +	/* Timer off means that we are not tracking quota. */
>>>>>> +	if (!evl_timer_is_running(&qs->limit_timer))
>>>>>> +		return;
>>>>>>  
>>>>>> -	now = evl_ktime_monotonic();
>>>>>> -	otg = curr->quota;
>>>>>> -	if (otg == NULL)
>>>>>> -		goto pick;
>>>>>>  	/*
>>>>>>  	 * Charge the time consumed by the outgoing thread to the
>>>>>>  	 * group it belongs to.
>>>>>>  	 */
>>>>>> -	elapsed = ktime_sub(now, otg->run_start);
>>>>>> -	if (elapsed < otg->run_budget)
>>>>>> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
>>>>>> -	else
>>>>>> -		otg->run_budget = 0;
>>>>>> +	now = evl_ktime_monotonic();
>>>>>
>>>>> Thing brins some, though minor, glitch when doing the run_start update
>>>>> for the next group already in pick. That's why I'm trying to reuse that
>>>>> stamp when available.
>>>>>
>>>>>> +	consumed = ktime_sub(now, tg->run_start);
>>>>>> +	if (consumed < tg->run_budget) {
>>>>>> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>>>>>> +	} else {
>>>>>> +		tg->run_budget = 0;
>>>>>> +		evl_stop_timer(&qs->limit_timer);
>>>>>> +	}
>>>>>> +}
>>>>>> +
>>>>>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>>> +{
>>>>>> +	struct evl_thread *next, *curr = rq->curr;
>>>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>>>> +	struct evl_quota_group *tg;
>>>>>> +
>>>>>>  pick:
>>>>>> -	next = evl_get_schedq(&rq->fifo.runnable);
>>>>>> +	next = evl_get_schedq(&qs->runnable);
>>>>>>  	if (next == NULL) {
>>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>>>  		return NULL;
>>>>>>  	}
>>>>>>  
>>>>>> -	/*
>>>>>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>>>>>> -	 * sure to detect non-quota threads.
>>>>>> -	 */
>>>>>>  	tg = next->quota;
>>>>>> -	if (tg == NULL)
>>>>>> -		return next;
>>>>>> -
>>>>>> -	tg->run_start = now;
>>>>>> +	tg->nr_active--;
>>>>>>  
>>>>>>  	/*
>>>>>>  	 * Don't consider budget if kicked, we have to allow this
>>>>>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>>>  	 */
>>>>>>  	if (next->info & EVL_T_KICKED) {
>>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>>> -		goto out;
>>>>>> +		return next;
>>>>>>  	}
>>>>>>  
>>>>>> +	/*
>>>>>> +	 * __pick_next_thread() might have requeued the current
>>>>>> +	 * thread which is still leading the pack.
>>>>>> +	 */
>>>>>> +	if (curr == next)
>>>>>> +		return next;
>>>>>> +
>>>>>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>>>>>> -		/* Flush expired group members as we go. */
>>>>>> +		/* Park expired group members as we go. */
>>>>>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>>>>>  		goto pick;
>>>>>>  	}
>>>>>>  
>>>>>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>>>>>> -		/* Same group, leave the running timer untouched. */
>>>>>> -		goto out;
>>>>>> -
>>>>>> -	/* Arm limit timer for the new running group. */
>>>>>> -	evl_start_timer(&qs->limit_timer,
>>>>>> -			ktime_add(now, tg->run_budget),
>>>>>> -			EVL_INFINITE);
>>>>>> -out:
>>>>>> -	tg->nr_active--;
>>>>>> +	/* Arm new limit timer on change of running group. */
>>>>>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>>>>>> +		tg->run_start = evl_ktime_monotonic();
>>>>>> +		evl_start_timer(&qs->limit_timer,
>>>>>> +				ktime_add(tg->run_start, tg->run_budget),
>>>>>> +				EVL_INFINITE);
>>>>>> +	}
>>>>>>  
>>>>>>  	return next;
>>>>>>  }
>>>>>> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>>>  	 * Unregister the group before we drop rq->lock. As a result,
>>>>>>  	 * it won't accept threads anymore while we are busy moving
>>>>>>  	 * the current members to the fifo class, and concurrent
>>>>>> -	 * evl_quota_remove requests would receive -EINVAL.
>>>>>> +	 * quota_remove requests would receive -EINVAL.
>>>>>>  	 */
>>>>>>  	__clear_bit(tg->tgid, group_map);
>>>>>>  	list_del(&tg->next);
>>>>>> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>>>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>>>>>>  	 * ABBA issue. No livelock is possible since we unregistered
>>>>>>  	 * that group already, so &tg->members can only be depleted
>>>>>> -	 * (by this loop specifically).
>>>>>> +	 * (by this loop exclusively).
>>>>>>  	 */
>>>>>>  
>>>>>>  	while (!list_empty(&tg->members)) {
>>>>>> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>>  			int *quota_sum_r)
>>>>>>  {
>>>>>>  	struct evl_rq *rq = tg->rq;
>>>>>> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
>>>>>> +	struct evl_thread *thread, *tmp;
>>>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>>>> -	ktime_t now, elapsed, consumed;
>>>>>>  	ktime_t old_quota = tg->quota;
>>>>>> +	ktime_t consumed;
>>>>>>  	u64 n;
>>>>>>  
>>>>>>  	assert_hard_lock(&rq->lock);
>>>>>> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>>  	tg->quota_percent = quota_percent;
>>>>>>  	tg->quota_peak_percent = quota_peak_percent;
>>>>>>  
>>>>>> -	if (thread_on_quota(curr, tg)) {
>>>>>> -		now = evl_ktime_monotonic();
>>>>>> -
>>>>>> -		elapsed = now - tg->run_start;
>>>>>> -		if (elapsed < tg->run_budget)
>>>>>> -			tg->run_budget -= elapsed;
>>>>>> -		else
>>>>>> -			tg->run_budget = 0;
>>>>>> -
>>>>>> -		tg->run_start = now;
>>>>>> +	if (current_on_quota(tg)) {
>>>>>> +		quota_out(rq->curr);
>>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>>>  	}
>>>>>>  
>>>>>> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>>>>>>  						quota_expired) {
>>>>>>  			list_del_init(&thread->quota_expired);
>>>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>>>  		}
>>>>>>  	}
>>>>>>  
>>>>>> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>>>>>>  	.sched_dequeue		=	quota_dequeue,
>>>>>>  	.sched_requeue		=	quota_requeue,
>>>>>>  	.sched_pick		=	quota_pick,
>>>>>> +	.sched_out		=	quota_out,
>>>>>>  	.sched_migrate		=	quota_migrate,
>>>>>>  	.sched_chkparam		=	quota_chkparam,
>>>>>>  	.sched_setparam		=	quota_setparam,
>>>>>>
>>>>>
>>>>> You also seem to miss the case I discovered only yesterday as well: When
>>>>> starting the current thread on a quota via setparam, we also need to set
>>>>> its run_start stamp at that point.
>>>>
>>>> run_start should be meaningful only when a limit timer runs. 
>>>>
>>>
>>> Right, but are you sure it will be started in the case I described...?
>>>
>> 
>> To be started, it must be picked, so yes.
>> 
>
> But prev == next then - nothing new to pick, no?
>
> But let me apply your patch and stop only looking at the diff.
>

Beware: not tested yet.

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  8:34             ` Jan Kiszka
  2026-06-15  8:53               ` Philippe Gerum
@ 2026-06-15  9:02               ` Philippe Gerum
  1 sibling, 0 replies; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  9:02 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 15.06.26 09:30, Philippe Gerum wrote:
>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>> 
>>> On 15.06.26 09:17, Philippe Gerum wrote:
>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>
>>>>> On 15.06.26 08:42, Philippe Gerum wrote:
>>>>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>>>>
>>>>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>>>>
>>>>>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>>>
>>>>>>>> This shall be invoked before a thread switch, providing both the current
>>>>>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>>>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>>>>>> higher-weighted class is providing the next thread.
>>>>>>>>
>>>>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>>> ---
>>>>>>>>  include/evl/sched.h     | 2 ++
>>>>>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>>>>>  2 files changed, 7 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>>>>>> --- a/include/evl/sched.h
>>>>>>>> +++ b/include/evl/sched.h
>>>>>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>>>>>> +			  struct evl_thread *next);
>>>>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>>>>  			      struct evl_rq *rq);
>>>>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>>>>>> --- a/kernel/evl/sched/core.c
>>>>>>>> +++ b/kernel/evl/sched/core.c
>>>>>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>>>>>   */
>>>>>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>>>  {
>>>>>>>> +	struct evl_sched_class *prev_schedclass;
>>>>>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>>>>>  	struct evl_thread *prev, *next, *curr;
>>>>>>>>  	bool leaving_inband, inband_tail;
>>>>>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>>>  	this_rq->curr = next;
>>>>>>>>  	leaving_inband = false;
>>>>>>>>  
>>>>>>>> +	prev_schedclass = prev->sched_class;
>>>>>>>> +	if (prev_schedclass->sched_out)
>>>>>>>> +		prev_schedclass->sched_out(prev, next);
>>>>>>>> +
>>>>>>>>  	/*
>>>>>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>>>>>  	 * performing the rest of the context switch code
>>>>>>>
>>>>>>> I've been working on this lately too. It turns out that we need more
>>>>>>> than this, although this is definitely part of the solution. I'll follow
>>>>>>> up on this issue.
>>>>>>
>>>>>> This is still wip, I'm sharing this early to discuss details for
>>>>>> reconciling both proposals (the hunk in __evl_schedule() is merely
>>>>>> cosmetic, no functional change).
>>>>>>
>>>>>
>>>>> Please avoid that for the final patch. I already had troubles telling
>>>>> substantial from cosmetical changes apart in your lazy-blocking commit
>>>>> (>50% unrelated changes in there).
>>>>>
>>>>>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>>>>>> Author: Philippe Gerum <rpm@xenomai.org>
>>>>>> Date:   Sun Jun 14 12:08:59 2026 +0200
>>>>>>
>>>>>>     evl: sched/quota: fix budget tracking on preemption
>>>>>>     
>>>>>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>>>>>     runtime budget of the former is inaccurately tracked. This is due to
>>>>>>     the fifo->pick() handler returning a valid thread, which prevents the
>>>>>>     quota->pick() handler from being called. As a result, the last runtime
>>>>>>     period of the outgoing thread is not accounted for.
>>>>>>     
>>>>>>     To fix this, we introduce a new sched_out() handler which is called
>>>>>>     for the outgoing thread, which the quota policy uses to update the
>>>>>>     remaining budget of preempted threads appropriately. In addition, the
>>>>>>     implementation no longer shares the runnable thread queue with
>>>>>>     SCHED_FIFO.
>>>>>
>>>>> ...and why we need a separate queue now? Please provide reasoning for
>>>>> changes.
>>>>>
>>>>>>     
>>>>>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>>>>>>
>>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>>> index ae9690860146..cc824c28004b 100644
>>>>>> --- a/include/evl/sched.h
>>>>>> +++ b/include/evl/sched.h
>>>>>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>>> +	void (*sched_out)(struct evl_thread *thread);
>>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>>  			      struct evl_rq *rq);
>>>>>> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
>>>>>> index dfe3b7390958..dc8416645da8 100644
>>>>>> --- a/include/evl/sched/quota.h
>>>>>> +++ b/include/evl/sched/quota.h
>>>>>> @@ -40,6 +40,7 @@ struct evl_quota_group {
>>>>>>  
>>>>>>  struct evl_sched_quota {
>>>>>>  	ktime_t period;
>>>>>> +	struct evl_sched_queue runnable;
>>>>>>  	struct evl_timer refill_timer;
>>>>>>  	struct evl_timer limit_timer;
>>>>>>  	struct list_head groups;
>>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>>> index d1d025e06a5d..7127d4e52df9 100644
>>>>>> --- a/kernel/evl/sched/core.c
>>>>>> +++ b/kernel/evl/sched/core.c
>>>>>> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>>>>>>  		evl_stop_timer(&rq->rrbtimer);
>>>>>>  }
>>>>>>  
>>>>>> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>>>> +static __always_inline struct evl_thread *
>>>>>> +__pick_next_thread(struct evl_rq *rq)
>>>>>>  {
>>>>>>  	struct evl_sched_class *sched_class;
>>>>>>  	struct evl_thread *curr = rq->curr;
>>>>>> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>>>>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>>>>>>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>>>>>>  {
>>>>>> -	struct evl_thread *next = __pick_next_thread(rq);
>>>>>> +	struct evl_thread *next, *prev = rq->curr;
>>>>>> +	struct evl_sched_class *prev_class = prev->sched_class;
>>>>>>  
>>>>>> +	if (prev_class->sched_out)
>>>>>> +		prev_class->sched_out(prev);
>>>>>> +
>>>>>> +	next = __pick_next_thread(rq);
>>>>>> +	trace_evl_pick_thread(next);
>>>>>>  	set_next_running(rq, next);
>>>>>>  
>>>>>>  	return next;
>>>>>> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>  		return;
>>>>>>  	}
>>>>>>  
>>>>>> +	prev = curr;
>>>>>>  	next = pick_next_thread(this_rq);
>>>>>> -	trace_evl_pick_thread(next);
>>>>>> -	if (next == curr) {
>>>>>> -		if (unlikely(next->state & EVL_T_ROOT)) {
>>>>>> +	if (next == prev) {
>>>>>> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>>>>>>  			if (this_rq->local_flags & RQ_TPROXY)
>>>>>>  				evl_notify_proxy_tick(this_rq);
>>>>>>  			if (this_rq->local_flags & RQ_TDEFER)
>>>>>>  				evl_program_local_tick(&evl_mono_clock);
>>>>>>  		}
>>>>>>  		raw_spin_unlock(&this_rq->lock);
>>>>>> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
>>>>>> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>>>>>>  		return;
>>>>>>  	}
>>>>>>  
>>>>>> -	prev = curr;
>>>>>>  	this_rq->curr = next;
>>>>>>  	leaving_inband = false;
>>>>>>  
>>>>>> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
>>>>>> index 0829da711a66..14748e3a843e 100644
>>>>>> --- a/kernel/evl/sched/quota.c
>>>>>> +++ b/kernel/evl/sched/quota.c
>>>>>> @@ -12,45 +12,35 @@
>>>>>>  #include <uapi/evl/sched-abi.h>
>>>>>>  
>>>>>>  /*
>>>>>> - * With this policy, each per-CPU runqueue maintains a list of active
>>>>>> - * thread groups for the sched_fifo class.
>>>>>> - *
>>>>>> - * Each time a thread is picked from the runqueue, we check whether we
>>>>>> - * still have budget for running it, looking at the group it belongs
>>>>>> - * to. If so, a timer is armed to elapse when that group has no more
>>>>>> - * budget, would the incoming thread run unpreempted until then
>>>>>> - * (i.e. evl_quota->limit_timer).
>>>>>> + * Each time a thread is picked from the ->runnable queue, we check
>>>>>> + * whether the group it belongs to still has runtime budget.  If so, a
>>>>>> + * timer is armed to fire when that group has no more budget, would
>>>>>> + * the incoming thread run unpreempted until then
>>>>>> + * (i.e. quota->limit_timer).
>>>>>>   *
>>>>>>   * Otherwise, if no budget remains in the group for running the
>>>>>>   * candidate thread, we move the latter to a local expiry queue
>>>>>>   * maintained by the group. This process is done on the fly as we pull
>>>>>> - * from the runqueue.
>>>>>> + * from the ->runnable queue.
>>>>>>   *
>>>>>> - * Updating the remaining budget is done each time the EVL core asks
>>>>>> - * for replacing the current thread with the next runnable one,
>>>>>> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
>>>>>> - * outgoing thread to the relevant group, and conversely, we check
>>>>>> - * whether the incoming thread has budget.
>>>>>> + * Updating the remaining budget is done each time the EVL core
>>>>>> + * schedules out a thread undergoing the quota scheduling policy,
>>>>>>   *
>>>>>> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
>>>>>> - * ticks in the background, in accordance to the defined quota
>>>>>> - * interval. Thread group budgets get replenished by its handler in
>>>>>> - * accordance to their respective share, pushing all expired threads
>>>>>> - * back to the run queue in the same move.
>>>>>> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
>>>>>> + * in the background, in accordance to the defined quota interval,
>>>>>> + * replenishing per-group budgets, pushing all expired threads back to
>>>>>> + * the quota ->runqueue too.
>>>>>>   *
>>>>>> - * NOTE: since the core logic enforcing the budget entirely happens in
>>>>>> - * evl_quota_pick(), applying a budget change can be done as simply as
>>>>>> - * forcing the rescheduling procedure to be invoked asap. As a result
>>>>>> - * of this, the EVL core will ask for the next thread to run, which
>>>>>> - * means calling evl_quota_pick() eventually.
>>>>>> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
>>>>>> + * apply a budget change.
>>>>>>   *
>>>>>> - * CAUTION: evl_quota_group->nr_active does count both the threads
>>>>>> - * from that group linked to the sched_fifo runqueue, _and_ the
>>>>>> - * threads moved to the local expiry queue. As a matter of fact, the
>>>>>> - * expired threads - those for which we consumed all the per-group
>>>>>> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
>>>>>> - * the EVL core. This only means that the SCHED_QUOTA policy won't
>>>>>> - * pick them until the corresponding budget is replenished.
>>>>>> + * CAUTION: quota_group->nr_active does count both the threads from
>>>>>> + * that group linked to the runnable queue, _and_ the threads moved to
>>>>>> + * the local expiry queue. As a matter of fact, the expired threads -
>>>>>> + * those for which we consumed all the per-group budget - are still
>>>>>> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
>>>>>> + * only means that the SCHED_QUOTA policy won't pick them until the
>>>>>> + * corresponding budget is replenished.
>>>>>>   */
>>>>>>  
>>>>>>  #define MAX_QUOTA_GROUPS  1024
>>>>>> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>>>>>>  
>>>>>>  static LIST_HEAD(group_list);
>>>>>>  
>>>>>> -static inline bool thread_on_quota(struct evl_thread *thread,
>>>>>> -				struct evl_quota_group *tg)
>>>>>> +static inline bool current_on_quota(struct evl_quota_group *tg)
>>>>>>  {
>>>>>> -	/*
>>>>>> -	 * Check whether @thread is running on some CPU, and belongs
>>>>>> -	 * to quota group @tg.
>>>>>> -	 */
>>>>>> -	return thread->quota == tg &&
>>>>>> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
>>>>>> +	struct evl_rq *rq = tg->rq;
>>>>>> +	struct evl_thread *curr = rq->curr;
>>>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>>>> +
>>>>>> +	if (curr->quota != tg)
>>>>>> +		return false;
>>>>>> +
>>>>>> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
>>>>>> +		return false;
>>>>>> +
>>>>>> +	return evl_timer_is_running(&qs->limit_timer);
>>>>>>  }
>>>>>>  
>>>>>>  static inline bool group_is_active(struct evl_quota_group *tg)
>>>>>> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>>>>>>  	 * runqueue, in which case tg->nr_active already accounted for
>>>>>>  	 * it.
>>>>>>  	 */
>>>>>> -	return thread_on_quota(tg->rq->curr, tg);
>>>>>> +	return current_on_quota(tg);
>>>>>>  }
>>>>>>  
>>>>>>  static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>>> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>>>  	} else if (tg->run_credit) {
>>>>>>  		credit = ktime_sub(tg->quota_peak, budget);
>>>>>>  		/* Consume the accumulated credit. */
>>>>>> -		if (tg->run_credit >= credit)
>>>>>> +		if (tg->run_credit >= credit) {
>>>>>>  			tg->run_credit =
>>>>>>  				ktime_sub(tg->run_credit, credit);
>>>>>> -		else {
>>>>>> +		} else {
>>>>>>  			credit = tg->run_credit;
>>>>>>  			tg->run_credit = 0;
>>>>>>  		}
>>>>>> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>>>  
>>>>>>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>>>  {
>>>>>> -	struct evl_quota_group *tg;
>>>>>>  	struct evl_thread *thread, *tmp;
>>>>>> +	struct evl_quota_group *tg;
>>>>>>  	struct evl_sched_quota *qs;
>>>>>>  	struct evl_rq *rq;
>>>>>>  
>>>>>> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>>>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>>>>>>  			continue;
>>>>>>  		/*
>>>>>> -		 * For each group living on this CPU, move all expired
>>>>>> +		 * For each group pinned on this CPU, move all expired
>>>>>>  		 * threads back to the runqueue. Since those threads
>>>>>>  		 * were moved out of the runqueue as we were
>>>>>>  		 * considering them for execution, we push them back
>>>>>> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>>>  		list_for_each_entry_safe_reverse(thread, tmp,
>>>>>>  						&tg->expired, quota_expired) {
>>>>>>  			list_del_init(&thread->quota_expired);
>>>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>>>  		}
>>>>>>  	}
>>>>>>  
>>>>>> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>>>  	/*
>>>>>>  	 * Force a rescheduling on the return path of the current
>>>>>>  	 * interrupt, so that the budget is re-evaluated for the
>>>>>> -	 * current group in evl_quota_pick().
>>>>>> +	 * current group in quota_pick().
>>>>>>  	 */
>>>>>>  	raw_spin_lock(&rq->lock);
>>>>>>  	evl_set_self_resched(rq);
>>>>>> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>>>>>>  {
>>>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>>>>  
>>>>>> +	evl_init_schedq(&qs->runnable);
>>>>>>  	qs->period = quota_period;
>>>>>>  	INIT_LIST_HEAD(&qs->groups);
>>>>>>  
>>>>>> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>>>>>>  
>>>>>>  static void quota_kick(struct evl_thread *thread)
>>>>>>  {
>>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>>  
>>>>>>  	/*
>>>>>>  	 * Allow a kicked thread to be elected for running until it
>>>>>> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>>>>>>  	 */
>>>>>>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>>>>>>  		list_del_init(&thread->quota_expired);
>>>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>>>  	}
>>>>>>  }
>>>>>>  
>>>>>> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>>>>>>  
>>>>>>  static void quota_enqueue(struct evl_thread *thread)
>>>>>>  {
>>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>>  
>>>>>>  	if (!thread_is_runnable(thread))
>>>>>>  		list_add_tail(&thread->quota_expired, &tg->expired);
>>>>>>  	else
>>>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>>>  
>>>>>>  	tg->nr_active++;
>>>>>>  }
>>>>>>  
>>>>>>  static void quota_dequeue(struct evl_thread *thread)
>>>>>>  {
>>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>>  
>>>>>>  	if (!list_empty(&thread->quota_expired))
>>>>>>  		list_del_init(&thread->quota_expired);
>>>>>>  	else
>>>>>> -		evl_del_schedq(&rq->fifo.runnable, thread);
>>>>>> +		evl_del_schedq(&qs->runnable, thread);
>>>>>>  
>>>>>>  	tg->nr_active--;
>>>>>>  }
>>>>>>  
>>>>>>  static void quota_requeue(struct evl_thread *thread)
>>>>>>  {
>>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>>  	struct evl_quota_group *tg = thread->quota;
>>>>>> -	struct evl_rq *rq = thread->rq;
>>>>>>  
>>>>>>  	if (!thread_is_runnable(thread))
>>>>>>  		list_add(&thread->quota_expired, &tg->expired);
>>>>>>  	else
>>>>>> -		evl_add_schedq(&rq->fifo.runnable, thread);
>>>>>> +		evl_add_schedq(&qs->runnable, thread);
>>>>>>  
>>>>>>  	tg->nr_active++;
>>>>>>  }
>>>>>>  
>>>>>> -static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>>> +static void quota_out(struct evl_thread *thread)
>>>>>>  {
>>>>>> -	struct evl_thread *next, *curr = rq->curr;
>>>>>> -	struct evl_sched_quota *qs = &rq->quota;
>>>>>> -	struct evl_quota_group *otg, *tg;
>>>>>> -	ktime_t now, elapsed;
>>>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>>> +	struct evl_quota_group *tg = thread->quota;
>>>>>> +	ktime_t now, consumed;
>>>>>> +
>>>>>> +	/* Timer off means that we are not tracking quota. */
>>>>>> +	if (!evl_timer_is_running(&qs->limit_timer))
>>>>>> +		return;
>>>>>>  
>>>>>> -	now = evl_ktime_monotonic();
>>>>>> -	otg = curr->quota;
>>>>>> -	if (otg == NULL)
>>>>>> -		goto pick;
>>>>>>  	/*
>>>>>>  	 * Charge the time consumed by the outgoing thread to the
>>>>>>  	 * group it belongs to.
>>>>>>  	 */
>>>>>> -	elapsed = ktime_sub(now, otg->run_start);
>>>>>> -	if (elapsed < otg->run_budget)
>>>>>> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
>>>>>> -	else
>>>>>> -		otg->run_budget = 0;
>>>>>> +	now = evl_ktime_monotonic();
>>>>>
>>>>> Thing brins some, though minor, glitch when doing the run_start update
>>>>> for the next group already in pick. That's why I'm trying to reuse that
>>>>> stamp when available.
>>>>>
>>>>>> +	consumed = ktime_sub(now, tg->run_start);
>>>>>> +	if (consumed < tg->run_budget) {
>>>>>> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>>>>>> +	} else {
>>>>>> +		tg->run_budget = 0;
>>>>>> +		evl_stop_timer(&qs->limit_timer);
>>>>>> +	}
>>>>>> +}
>>>>>> +
>>>>>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>>> +{
>>>>>> +	struct evl_thread *next, *curr = rq->curr;
>>>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>>>> +	struct evl_quota_group *tg;
>>>>>> +
>>>>>>  pick:
>>>>>> -	next = evl_get_schedq(&rq->fifo.runnable);
>>>>>> +	next = evl_get_schedq(&qs->runnable);
>>>>>>  	if (next == NULL) {
>>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>>>  		return NULL;
>>>>>>  	}
>>>>>>  
>>>>>> -	/*
>>>>>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>>>>>> -	 * sure to detect non-quota threads.
>>>>>> -	 */
>>>>>>  	tg = next->quota;
>>>>>> -	if (tg == NULL)
>>>>>> -		return next;
>>>>>> -
>>>>>> -	tg->run_start = now;
>>>>>> +	tg->nr_active--;
>>>>>>  
>>>>>>  	/*
>>>>>>  	 * Don't consider budget if kicked, we have to allow this
>>>>>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>>>  	 */
>>>>>>  	if (next->info & EVL_T_KICKED) {
>>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>>> -		goto out;
>>>>>> +		return next;
>>>>>>  	}
>>>>>>  
>>>>>> +	/*
>>>>>> +	 * __pick_next_thread() might have requeued the current
>>>>>> +	 * thread which is still leading the pack.
>>>>>> +	 */
>>>>>> +	if (curr == next)
>>>>>> +		return next;
>>>>>> +
>>>>>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>>>>>> -		/* Flush expired group members as we go. */
>>>>>> +		/* Park expired group members as we go. */
>>>>>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>>>>>  		goto pick;
>>>>>>  	}
>>>>>>  
>>>>>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>>>>>> -		/* Same group, leave the running timer untouched. */
>>>>>> -		goto out;
>>>>>> -
>>>>>> -	/* Arm limit timer for the new running group. */
>>>>>> -	evl_start_timer(&qs->limit_timer,
>>>>>> -			ktime_add(now, tg->run_budget),
>>>>>> -			EVL_INFINITE);
>>>>>> -out:
>>>>>> -	tg->nr_active--;
>>>>>> +	/* Arm new limit timer on change of running group. */
>>>>>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>>>>>> +		tg->run_start = evl_ktime_monotonic();
>>>>>> +		evl_start_timer(&qs->limit_timer,
>>>>>> +				ktime_add(tg->run_start, tg->run_budget),
>>>>>> +				EVL_INFINITE);
>>>>>> +	}
>>>>>>  
>>>>>>  	return next;
>>>>>>  }
>>>>>> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>>>  	 * Unregister the group before we drop rq->lock. As a result,
>>>>>>  	 * it won't accept threads anymore while we are busy moving
>>>>>>  	 * the current members to the fifo class, and concurrent
>>>>>> -	 * evl_quota_remove requests would receive -EINVAL.
>>>>>> +	 * quota_remove requests would receive -EINVAL.
>>>>>>  	 */
>>>>>>  	__clear_bit(tg->tgid, group_map);
>>>>>>  	list_del(&tg->next);
>>>>>> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>>>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>>>>>>  	 * ABBA issue. No livelock is possible since we unregistered
>>>>>>  	 * that group already, so &tg->members can only be depleted
>>>>>> -	 * (by this loop specifically).
>>>>>> +	 * (by this loop exclusively).
>>>>>>  	 */
>>>>>>  
>>>>>>  	while (!list_empty(&tg->members)) {
>>>>>> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>>  			int *quota_sum_r)
>>>>>>  {
>>>>>>  	struct evl_rq *rq = tg->rq;
>>>>>> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
>>>>>> +	struct evl_thread *thread, *tmp;
>>>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>>>> -	ktime_t now, elapsed, consumed;
>>>>>>  	ktime_t old_quota = tg->quota;
>>>>>> +	ktime_t consumed;
>>>>>>  	u64 n;
>>>>>>  
>>>>>>  	assert_hard_lock(&rq->lock);
>>>>>> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>>  	tg->quota_percent = quota_percent;
>>>>>>  	tg->quota_peak_percent = quota_peak_percent;
>>>>>>  
>>>>>> -	if (thread_on_quota(curr, tg)) {
>>>>>> -		now = evl_ktime_monotonic();
>>>>>> -
>>>>>> -		elapsed = now - tg->run_start;
>>>>>> -		if (elapsed < tg->run_budget)
>>>>>> -			tg->run_budget -= elapsed;
>>>>>> -		else
>>>>>> -			tg->run_budget = 0;
>>>>>> -
>>>>>> -		tg->run_start = now;
>>>>>> +	if (current_on_quota(tg)) {
>>>>>> +		quota_out(rq->curr);
>>>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>>>  	}
>>>>>>  
>>>>>> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>>>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>>>>>>  						quota_expired) {
>>>>>>  			list_del_init(&thread->quota_expired);
>>>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>>>  		}
>>>>>>  	}
>>>>>>  
>>>>>> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>>>>>>  	.sched_dequeue		=	quota_dequeue,
>>>>>>  	.sched_requeue		=	quota_requeue,
>>>>>>  	.sched_pick		=	quota_pick,
>>>>>> +	.sched_out		=	quota_out,
>>>>>>  	.sched_migrate		=	quota_migrate,
>>>>>>  	.sched_chkparam		=	quota_chkparam,
>>>>>>  	.sched_setparam		=	quota_setparam,
>>>>>>
>>>>>
>>>>> You also seem to miss the case I discovered only yesterday as well: When
>>>>> starting the current thread on a quota via setparam, we also need to set
>>>>> its run_start stamp at that point.
>>>>
>>>> run_start should be meaningful only when a limit timer runs. 
>>>>
>>>
>>> Right, but are you sure it will be started in the case I described...?
>>>
>> 
>> To be started, it must be picked, so yes.
>> 
>
> But prev == next then - nothing new to pick, no?
>

Yes, with the current group properly being charged for the consumed time
since sched_out() is called first and unconditionally. Whether next ==
curr can only be determined by sched_pick() anyway.

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  6:58     ` Jan Kiszka
  2026-06-15  7:17       ` Philippe Gerum
@ 2026-06-15  8:09       ` Philippe Gerum
  2026-06-15  8:40         ` Jan Kiszka
  1 sibling, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  8:09 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 15.06.26 08:42, Philippe Gerum wrote:
>> Philippe Gerum <rpm@xenomai.org> writes:
>> 
>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>
>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>
>>>> This shall be invoked before a thread switch, providing both the current
>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>> higher-weighted class is providing the next thread.
>>>>
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>> ---
>>>>  include/evl/sched.h     | 2 ++
>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>  2 files changed, 7 insertions(+)
>>>>
>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>> --- a/include/evl/sched.h
>>>> +++ b/include/evl/sched.h
>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>> +			  struct evl_thread *next);
>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>  			      struct evl_rq *rq);
>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>> --- a/kernel/evl/sched/core.c
>>>> +++ b/kernel/evl/sched/core.c
>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>   */
>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>  {
>>>> +	struct evl_sched_class *prev_schedclass;
>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>  	struct evl_thread *prev, *next, *curr;
>>>>  	bool leaving_inband, inband_tail;
>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>  	this_rq->curr = next;
>>>>  	leaving_inband = false;
>>>>  
>>>> +	prev_schedclass = prev->sched_class;
>>>> +	if (prev_schedclass->sched_out)
>>>> +		prev_schedclass->sched_out(prev, next);
>>>> +
>>>>  	/*
>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>  	 * performing the rest of the context switch code
>>>
>>> I've been working on this lately too. It turns out that we need more
>>> than this, although this is definitely part of the solution. I'll follow
>>> up on this issue.
>> 
>> This is still wip, I'm sharing this early to discuss details for
>> reconciling both proposals (the hunk in __evl_schedule() is merely
>> cosmetic, no functional change).
>> 
>
> Please avoid that for the final patch. I already had troubles telling
> substantial from cosmetical changes apart in your lazy-blocking commit
> (>50% unrelated changes in there).
>
>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>> Author: Philippe Gerum <rpm@xenomai.org>
>> Date:   Sun Jun 14 12:08:59 2026 +0200
>> 
>>     evl: sched/quota: fix budget tracking on preemption
>>     
>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>     runtime budget of the former is inaccurately tracked. This is due to
>>     the fifo->pick() handler returning a valid thread, which prevents the
>>     quota->pick() handler from being called. As a result, the last runtime
>>     period of the outgoing thread is not accounted for.
>>     
>>     To fix this, we introduce a new sched_out() handler which is called
>>     for the outgoing thread, which the quota policy uses to update the
>>     remaining budget of preempted threads appropriately. In addition, the
>>     implementation no longer shares the runnable thread queue with
>>     SCHED_FIFO.
>
> ...and why we need a separate queue now? Please provide reasoning for
> changes.
>
>>     
>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>> 
>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>> index ae9690860146..cc824c28004b 100644
>> --- a/include/evl/sched.h
>> +++ b/include/evl/sched.h
>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>  	void (*sched_requeue)(struct evl_thread *thread);
>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>> +	void (*sched_out)(struct evl_thread *thread);
>>  	void (*sched_yield)(struct evl_thread *thread);
>>  	void (*sched_migrate)(struct evl_thread *thread,
>>  			      struct evl_rq *rq);
>> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
>> index dfe3b7390958..dc8416645da8 100644
>> --- a/include/evl/sched/quota.h
>> +++ b/include/evl/sched/quota.h
>> @@ -40,6 +40,7 @@ struct evl_quota_group {
>>  
>>  struct evl_sched_quota {
>>  	ktime_t period;
>> +	struct evl_sched_queue runnable;
>>  	struct evl_timer refill_timer;
>>  	struct evl_timer limit_timer;
>>  	struct list_head groups;
>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>> index d1d025e06a5d..7127d4e52df9 100644
>> --- a/kernel/evl/sched/core.c
>> +++ b/kernel/evl/sched/core.c
>> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>>  		evl_stop_timer(&rq->rrbtimer);
>>  }
>>  
>> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>> +static __always_inline struct evl_thread *
>> +__pick_next_thread(struct evl_rq *rq)
>>  {
>>  	struct evl_sched_class *sched_class;
>>  	struct evl_thread *curr = rq->curr;
>> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>>  {
>> -	struct evl_thread *next = __pick_next_thread(rq);
>> +	struct evl_thread *next, *prev = rq->curr;
>> +	struct evl_sched_class *prev_class = prev->sched_class;
>>  
>> +	if (prev_class->sched_out)
>> +		prev_class->sched_out(prev);
>> +
>> +	next = __pick_next_thread(rq);
>> +	trace_evl_pick_thread(next);
>>  	set_next_running(rq, next);
>>  
>>  	return next;
>> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>  		return;
>>  	}
>>  
>> +	prev = curr;
>>  	next = pick_next_thread(this_rq);
>> -	trace_evl_pick_thread(next);
>> -	if (next == curr) {
>> -		if (unlikely(next->state & EVL_T_ROOT)) {
>> +	if (next == prev) {
>> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>>  			if (this_rq->local_flags & RQ_TPROXY)
>>  				evl_notify_proxy_tick(this_rq);
>>  			if (this_rq->local_flags & RQ_TDEFER)
>>  				evl_program_local_tick(&evl_mono_clock);
>>  		}
>>  		raw_spin_unlock(&this_rq->lock);
>> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
>> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>>  		return;
>>  	}
>>  
>> -	prev = curr;
>>  	this_rq->curr = next;
>>  	leaving_inband = false;
>>  
>> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
>> index 0829da711a66..14748e3a843e 100644
>> --- a/kernel/evl/sched/quota.c
>> +++ b/kernel/evl/sched/quota.c
>> @@ -12,45 +12,35 @@
>>  #include <uapi/evl/sched-abi.h>
>>  
>>  /*
>> - * With this policy, each per-CPU runqueue maintains a list of active
>> - * thread groups for the sched_fifo class.
>> - *
>> - * Each time a thread is picked from the runqueue, we check whether we
>> - * still have budget for running it, looking at the group it belongs
>> - * to. If so, a timer is armed to elapse when that group has no more
>> - * budget, would the incoming thread run unpreempted until then
>> - * (i.e. evl_quota->limit_timer).
>> + * Each time a thread is picked from the ->runnable queue, we check
>> + * whether the group it belongs to still has runtime budget.  If so, a
>> + * timer is armed to fire when that group has no more budget, would
>> + * the incoming thread run unpreempted until then
>> + * (i.e. quota->limit_timer).
>>   *
>>   * Otherwise, if no budget remains in the group for running the
>>   * candidate thread, we move the latter to a local expiry queue
>>   * maintained by the group. This process is done on the fly as we pull
>> - * from the runqueue.
>> + * from the ->runnable queue.
>>   *
>> - * Updating the remaining budget is done each time the EVL core asks
>> - * for replacing the current thread with the next runnable one,
>> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
>> - * outgoing thread to the relevant group, and conversely, we check
>> - * whether the incoming thread has budget.
>> + * Updating the remaining budget is done each time the EVL core
>> + * schedules out a thread undergoing the quota scheduling policy,
>>   *
>> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
>> - * ticks in the background, in accordance to the defined quota
>> - * interval. Thread group budgets get replenished by its handler in
>> - * accordance to their respective share, pushing all expired threads
>> - * back to the run queue in the same move.
>> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
>> + * in the background, in accordance to the defined quota interval,
>> + * replenishing per-group budgets, pushing all expired threads back to
>> + * the quota ->runqueue too.
>>   *
>> - * NOTE: since the core logic enforcing the budget entirely happens in
>> - * evl_quota_pick(), applying a budget change can be done as simply as
>> - * forcing the rescheduling procedure to be invoked asap. As a result
>> - * of this, the EVL core will ask for the next thread to run, which
>> - * means calling evl_quota_pick() eventually.
>> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
>> + * apply a budget change.
>>   *
>> - * CAUTION: evl_quota_group->nr_active does count both the threads
>> - * from that group linked to the sched_fifo runqueue, _and_ the
>> - * threads moved to the local expiry queue. As a matter of fact, the
>> - * expired threads - those for which we consumed all the per-group
>> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
>> - * the EVL core. This only means that the SCHED_QUOTA policy won't
>> - * pick them until the corresponding budget is replenished.
>> + * CAUTION: quota_group->nr_active does count both the threads from
>> + * that group linked to the runnable queue, _and_ the threads moved to
>> + * the local expiry queue. As a matter of fact, the expired threads -
>> + * those for which we consumed all the per-group budget - are still
>> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
>> + * only means that the SCHED_QUOTA policy won't pick them until the
>> + * corresponding budget is replenished.
>>   */
>>  
>>  #define MAX_QUOTA_GROUPS  1024
>> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>>  
>>  static LIST_HEAD(group_list);
>>  
>> -static inline bool thread_on_quota(struct evl_thread *thread,
>> -				struct evl_quota_group *tg)
>> +static inline bool current_on_quota(struct evl_quota_group *tg)
>>  {
>> -	/*
>> -	 * Check whether @thread is running on some CPU, and belongs
>> -	 * to quota group @tg.
>> -	 */
>> -	return thread->quota == tg &&
>> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
>> +	struct evl_rq *rq = tg->rq;
>> +	struct evl_thread *curr = rq->curr;
>> +	struct evl_sched_quota *qs = &rq->quota;
>> +
>> +	if (curr->quota != tg)
>> +		return false;
>> +
>> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
>> +		return false;
>> +
>> +	return evl_timer_is_running(&qs->limit_timer);
>>  }
>>  
>>  static inline bool group_is_active(struct evl_quota_group *tg)
>> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>>  	 * runqueue, in which case tg->nr_active already accounted for
>>  	 * it.
>>  	 */
>> -	return thread_on_quota(tg->rq->curr, tg);
>> +	return current_on_quota(tg);
>>  }
>>  
>>  static inline void replenish_budget(struct evl_sched_quota *qs,
>> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>  	} else if (tg->run_credit) {
>>  		credit = ktime_sub(tg->quota_peak, budget);
>>  		/* Consume the accumulated credit. */
>> -		if (tg->run_credit >= credit)
>> +		if (tg->run_credit >= credit) {
>>  			tg->run_credit =
>>  				ktime_sub(tg->run_credit, credit);
>> -		else {
>> +		} else {
>>  			credit = tg->run_credit;
>>  			tg->run_credit = 0;
>>  		}
>> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>  
>>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>>  {
>> -	struct evl_quota_group *tg;
>>  	struct evl_thread *thread, *tmp;
>> +	struct evl_quota_group *tg;
>>  	struct evl_sched_quota *qs;
>>  	struct evl_rq *rq;
>>  
>> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>>  			continue;
>>  		/*
>> -		 * For each group living on this CPU, move all expired
>> +		 * For each group pinned on this CPU, move all expired
>>  		 * threads back to the runqueue. Since those threads
>>  		 * were moved out of the runqueue as we were
>>  		 * considering them for execution, we push them back
>> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>  		list_for_each_entry_safe_reverse(thread, tmp,
>>  						&tg->expired, quota_expired) {
>>  			list_del_init(&thread->quota_expired);
>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>> +			evl_add_schedq(&qs->runnable, thread);
>>  		}
>>  	}
>>  
>> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>>  	/*
>>  	 * Force a rescheduling on the return path of the current
>>  	 * interrupt, so that the budget is re-evaluated for the
>> -	 * current group in evl_quota_pick().
>> +	 * current group in quota_pick().
>>  	 */
>>  	raw_spin_lock(&rq->lock);
>>  	evl_set_self_resched(rq);
>> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>>  {
>>  	struct evl_sched_quota *qs = &rq->quota;
>>  
>> +	evl_init_schedq(&qs->runnable);
>>  	qs->period = quota_period;
>>  	INIT_LIST_HEAD(&qs->groups);
>>  
>> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>>  
>>  static void quota_kick(struct evl_thread *thread)
>>  {
>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>  	struct evl_quota_group *tg = thread->quota;
>> -	struct evl_rq *rq = thread->rq;
>>  
>>  	/*
>>  	 * Allow a kicked thread to be elected for running until it
>> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>>  	 */
>>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>>  		list_del_init(&thread->quota_expired);
>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>  	}
>>  }
>>  
>> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>>  
>>  static void quota_enqueue(struct evl_thread *thread)
>>  {
>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>  	struct evl_quota_group *tg = thread->quota;
>> -	struct evl_rq *rq = thread->rq;
>>  
>>  	if (!thread_is_runnable(thread))
>>  		list_add_tail(&thread->quota_expired, &tg->expired);
>>  	else
>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>  
>>  	tg->nr_active++;
>>  }
>>  
>>  static void quota_dequeue(struct evl_thread *thread)
>>  {
>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>  	struct evl_quota_group *tg = thread->quota;
>> -	struct evl_rq *rq = thread->rq;
>>  
>>  	if (!list_empty(&thread->quota_expired))
>>  		list_del_init(&thread->quota_expired);
>>  	else
>> -		evl_del_schedq(&rq->fifo.runnable, thread);
>> +		evl_del_schedq(&qs->runnable, thread);
>>  
>>  	tg->nr_active--;
>>  }
>>  
>>  static void quota_requeue(struct evl_thread *thread)
>>  {
>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>  	struct evl_quota_group *tg = thread->quota;
>> -	struct evl_rq *rq = thread->rq;
>>  
>>  	if (!thread_is_runnable(thread))
>>  		list_add(&thread->quota_expired, &tg->expired);
>>  	else
>> -		evl_add_schedq(&rq->fifo.runnable, thread);
>> +		evl_add_schedq(&qs->runnable, thread);
>>  
>>  	tg->nr_active++;
>>  }
>>  
>> -static struct evl_thread *quota_pick(struct evl_rq *rq)
>> +static void quota_out(struct evl_thread *thread)
>>  {
>> -	struct evl_thread *next, *curr = rq->curr;
>> -	struct evl_sched_quota *qs = &rq->quota;
>> -	struct evl_quota_group *otg, *tg;
>> -	ktime_t now, elapsed;
>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>> +	struct evl_quota_group *tg = thread->quota;
>> +	ktime_t now, consumed;
>> +
>> +	/* Timer off means that we are not tracking quota. */
>> +	if (!evl_timer_is_running(&qs->limit_timer))
>> +		return;
>>  
>> -	now = evl_ktime_monotonic();
>> -	otg = curr->quota;
>> -	if (otg == NULL)
>> -		goto pick;
>>  	/*
>>  	 * Charge the time consumed by the outgoing thread to the
>>  	 * group it belongs to.
>>  	 */
>> -	elapsed = ktime_sub(now, otg->run_start);
>> -	if (elapsed < otg->run_budget)
>> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
>> -	else
>> -		otg->run_budget = 0;
>> +	now = evl_ktime_monotonic();
>
> Thing brins some, though minor, glitch when doing the run_start update
> for the next group already in pick. That's why I'm trying to reuse that
> stamp when available.
>
>> +	consumed = ktime_sub(now, tg->run_start);
>> +	if (consumed < tg->run_budget) {
>> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>> +	} else {
>> +		tg->run_budget = 0;
>> +		evl_stop_timer(&qs->limit_timer);
>> +	}
>> +}
>> +
>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>> +{
>> +	struct evl_thread *next, *curr = rq->curr;
>> +	struct evl_sched_quota *qs = &rq->quota;
>> +	struct evl_quota_group *tg;
>> +
>>  pick:
>> -	next = evl_get_schedq(&rq->fifo.runnable);
>> +	next = evl_get_schedq(&qs->runnable);
>>  	if (next == NULL) {
>>  		evl_stop_timer(&qs->limit_timer);
>>  		return NULL;
>>  	}
>>  
>> -	/*
>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>> -	 * sure to detect non-quota threads.
>> -	 */
>>  	tg = next->quota;
>> -	if (tg == NULL)
>> -		return next;
>> -
>> -	tg->run_start = now;
>> +	tg->nr_active--;
>>  
>>  	/*
>>  	 * Don't consider budget if kicked, we have to allow this
>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>  	 */
>>  	if (next->info & EVL_T_KICKED) {
>>  		evl_stop_timer(&qs->limit_timer);
>> -		goto out;
>> +		return next;
>>  	}
>>  
>> +	/*
>> +	 * __pick_next_thread() might have requeued the current
>> +	 * thread which is still leading the pack.
>> +	 */
>> +	if (curr == next)
>> +		return next;
>> +
>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>> -		/* Flush expired group members as we go. */
>> +		/* Park expired group members as we go. */
>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>  		goto pick;
>>  	}
>>  
>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>> -		/* Same group, leave the running timer untouched. */
>> -		goto out;
>> -
>> -	/* Arm limit timer for the new running group. */
>> -	evl_start_timer(&qs->limit_timer,
>> -			ktime_add(now, tg->run_budget),
>> -			EVL_INFINITE);
>> -out:
>> -	tg->nr_active--;
>> +	/* Arm new limit timer on change of running group. */
>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>> +		tg->run_start = evl_ktime_monotonic();
>> +		evl_start_timer(&qs->limit_timer,
>> +				ktime_add(tg->run_start, tg->run_budget),
>> +				EVL_INFINITE);
>> +	}
>>  
>>  	return next;
>>  }
>> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>  	 * Unregister the group before we drop rq->lock. As a result,
>>  	 * it won't accept threads anymore while we are busy moving
>>  	 * the current members to the fifo class, and concurrent
>> -	 * evl_quota_remove requests would receive -EINVAL.
>> +	 * quota_remove requests would receive -EINVAL.
>>  	 */
>>  	__clear_bit(tg->tgid, group_map);
>>  	list_del(&tg->next);
>> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>>  	 * ABBA issue. No livelock is possible since we unregistered
>>  	 * that group already, so &tg->members can only be depleted
>> -	 * (by this loop specifically).
>> +	 * (by this loop exclusively).
>>  	 */
>>  
>>  	while (!list_empty(&tg->members)) {
>> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>  			int *quota_sum_r)
>>  {
>>  	struct evl_rq *rq = tg->rq;
>> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
>> +	struct evl_thread *thread, *tmp;
>>  	struct evl_sched_quota *qs = &rq->quota;
>> -	ktime_t now, elapsed, consumed;
>>  	ktime_t old_quota = tg->quota;
>> +	ktime_t consumed;
>>  	u64 n;
>>  
>>  	assert_hard_lock(&rq->lock);
>> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>  	tg->quota_percent = quota_percent;
>>  	tg->quota_peak_percent = quota_peak_percent;
>>  
>> -	if (thread_on_quota(curr, tg)) {
>> -		now = evl_ktime_monotonic();
>> -
>> -		elapsed = now - tg->run_start;
>> -		if (elapsed < tg->run_budget)
>> -			tg->run_budget -= elapsed;
>> -		else
>> -			tg->run_budget = 0;
>> -
>> -		tg->run_start = now;
>> +	if (current_on_quota(tg)) {
>> +		quota_out(rq->curr);
>>  		evl_stop_timer(&qs->limit_timer);
>>  	}
>>  
>> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>>  						quota_expired) {
>>  			list_del_init(&thread->quota_expired);
>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>> +			evl_add_schedq(&qs->runnable, thread);
>>  		}
>>  	}
>>  
>> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>>  	.sched_dequeue		=	quota_dequeue,
>>  	.sched_requeue		=	quota_requeue,
>>  	.sched_pick		=	quota_pick,
>> +	.sched_out		=	quota_out,
>>  	.sched_migrate		=	quota_migrate,
>>  	.sched_chkparam		=	quota_chkparam,
>>  	.sched_setparam		=	quota_setparam,
>> 
>
> You also seem to miss the case I discovered only yesterday as well: When
> starting the current thread on a quota via setparam, we also need to set
> its run_start stamp at that point.
>
> Please have a closer look at both my test case (libevl) as well as my
> patch 2. Currently, I don't see remaining issues with both, but I'm all
> ears to learn what I missed.
>

AFAIU, the logic of your changes is as follows:

1. provide a sched_out() handler which charges the previous/current
   group with the budget consumed by the outgoing thread, dropping such
   accounting from sched_pick() in the same move. As a consequence,
   charge the consumed budget in quota_limit_handler() too since you
   can't expect sched_pick() to do so (i.e. turning on the resched bit
   on curr->rq in quota_limit_handler() would not help in this case).

2. invoke sched_out() after sched_pick(), unless prev == next.

Since sched_pick() checks next->quota->run_budget to make sure that next
may run, what if curr and next belong to the same quota group given that
curr->quota->run_budget won't be updated by sched_out() until
sched_pick() has returned?

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  8:09       ` Philippe Gerum
@ 2026-06-15  8:40         ` Jan Kiszka
  2026-06-15  8:55           ` Philippe Gerum
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2026-06-15  8:40 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: Xenomai

On 15.06.26 10:09, Philippe Gerum wrote:
> Jan Kiszka <jan.kiszka@siemens.com> writes:
> 
>> On 15.06.26 08:42, Philippe Gerum wrote:
>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>
>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>
>>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>
>>>>> This shall be invoked before a thread switch, providing both the current
>>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>>> higher-weighted class is providing the next thread.
>>>>>
>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>> ---
>>>>>  include/evl/sched.h     | 2 ++
>>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>>  2 files changed, 7 insertions(+)
>>>>>
>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>>> --- a/include/evl/sched.h
>>>>> +++ b/include/evl/sched.h
>>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>>> +			  struct evl_thread *next);
>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>  			      struct evl_rq *rq);
>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>>> --- a/kernel/evl/sched/core.c
>>>>> +++ b/kernel/evl/sched/core.c
>>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>>   */
>>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>  {
>>>>> +	struct evl_sched_class *prev_schedclass;
>>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>>  	struct evl_thread *prev, *next, *curr;
>>>>>  	bool leaving_inband, inband_tail;
>>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>  	this_rq->curr = next;
>>>>>  	leaving_inband = false;
>>>>>  
>>>>> +	prev_schedclass = prev->sched_class;
>>>>> +	if (prev_schedclass->sched_out)
>>>>> +		prev_schedclass->sched_out(prev, next);
>>>>> +
>>>>>  	/*
>>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>>  	 * performing the rest of the context switch code
>>>>
>>>> I've been working on this lately too. It turns out that we need more
>>>> than this, although this is definitely part of the solution. I'll follow
>>>> up on this issue.
>>>
>>> This is still wip, I'm sharing this early to discuss details for
>>> reconciling both proposals (the hunk in __evl_schedule() is merely
>>> cosmetic, no functional change).
>>>
>>
>> Please avoid that for the final patch. I already had troubles telling
>> substantial from cosmetical changes apart in your lazy-blocking commit
>> (>50% unrelated changes in there).
>>
>>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>>> Author: Philippe Gerum <rpm@xenomai.org>
>>> Date:   Sun Jun 14 12:08:59 2026 +0200
>>>
>>>     evl: sched/quota: fix budget tracking on preemption
>>>     
>>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>>     runtime budget of the former is inaccurately tracked. This is due to
>>>     the fifo->pick() handler returning a valid thread, which prevents the
>>>     quota->pick() handler from being called. As a result, the last runtime
>>>     period of the outgoing thread is not accounted for.
>>>     
>>>     To fix this, we introduce a new sched_out() handler which is called
>>>     for the outgoing thread, which the quota policy uses to update the
>>>     remaining budget of preempted threads appropriately. In addition, the
>>>     implementation no longer shares the runnable thread queue with
>>>     SCHED_FIFO.
>>
>> ...and why we need a separate queue now? Please provide reasoning for
>> changes.
>>
>>>     
>>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>>>
>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>> index ae9690860146..cc824c28004b 100644
>>> --- a/include/evl/sched.h
>>> +++ b/include/evl/sched.h
>>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>> +	void (*sched_out)(struct evl_thread *thread);
>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>  			      struct evl_rq *rq);
>>> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
>>> index dfe3b7390958..dc8416645da8 100644
>>> --- a/include/evl/sched/quota.h
>>> +++ b/include/evl/sched/quota.h
>>> @@ -40,6 +40,7 @@ struct evl_quota_group {
>>>  
>>>  struct evl_sched_quota {
>>>  	ktime_t period;
>>> +	struct evl_sched_queue runnable;
>>>  	struct evl_timer refill_timer;
>>>  	struct evl_timer limit_timer;
>>>  	struct list_head groups;
>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>> index d1d025e06a5d..7127d4e52df9 100644
>>> --- a/kernel/evl/sched/core.c
>>> +++ b/kernel/evl/sched/core.c
>>> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>>>  		evl_stop_timer(&rq->rrbtimer);
>>>  }
>>>  
>>> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>> +static __always_inline struct evl_thread *
>>> +__pick_next_thread(struct evl_rq *rq)
>>>  {
>>>  	struct evl_sched_class *sched_class;
>>>  	struct evl_thread *curr = rq->curr;
>>> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>>>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>>>  {
>>> -	struct evl_thread *next = __pick_next_thread(rq);
>>> +	struct evl_thread *next, *prev = rq->curr;
>>> +	struct evl_sched_class *prev_class = prev->sched_class;
>>>  
>>> +	if (prev_class->sched_out)
>>> +		prev_class->sched_out(prev);
>>> +
>>> +	next = __pick_next_thread(rq);
>>> +	trace_evl_pick_thread(next);
>>>  	set_next_running(rq, next);
>>>  
>>>  	return next;
>>> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>  		return;
>>>  	}
>>>  
>>> +	prev = curr;
>>>  	next = pick_next_thread(this_rq);
>>> -	trace_evl_pick_thread(next);
>>> -	if (next == curr) {
>>> -		if (unlikely(next->state & EVL_T_ROOT)) {
>>> +	if (next == prev) {
>>> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>>>  			if (this_rq->local_flags & RQ_TPROXY)
>>>  				evl_notify_proxy_tick(this_rq);
>>>  			if (this_rq->local_flags & RQ_TDEFER)
>>>  				evl_program_local_tick(&evl_mono_clock);
>>>  		}
>>>  		raw_spin_unlock(&this_rq->lock);
>>> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
>>> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>>>  		return;
>>>  	}
>>>  
>>> -	prev = curr;
>>>  	this_rq->curr = next;
>>>  	leaving_inband = false;
>>>  
>>> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
>>> index 0829da711a66..14748e3a843e 100644
>>> --- a/kernel/evl/sched/quota.c
>>> +++ b/kernel/evl/sched/quota.c
>>> @@ -12,45 +12,35 @@
>>>  #include <uapi/evl/sched-abi.h>
>>>  
>>>  /*
>>> - * With this policy, each per-CPU runqueue maintains a list of active
>>> - * thread groups for the sched_fifo class.
>>> - *
>>> - * Each time a thread is picked from the runqueue, we check whether we
>>> - * still have budget for running it, looking at the group it belongs
>>> - * to. If so, a timer is armed to elapse when that group has no more
>>> - * budget, would the incoming thread run unpreempted until then
>>> - * (i.e. evl_quota->limit_timer).
>>> + * Each time a thread is picked from the ->runnable queue, we check
>>> + * whether the group it belongs to still has runtime budget.  If so, a
>>> + * timer is armed to fire when that group has no more budget, would
>>> + * the incoming thread run unpreempted until then
>>> + * (i.e. quota->limit_timer).
>>>   *
>>>   * Otherwise, if no budget remains in the group for running the
>>>   * candidate thread, we move the latter to a local expiry queue
>>>   * maintained by the group. This process is done on the fly as we pull
>>> - * from the runqueue.
>>> + * from the ->runnable queue.
>>>   *
>>> - * Updating the remaining budget is done each time the EVL core asks
>>> - * for replacing the current thread with the next runnable one,
>>> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
>>> - * outgoing thread to the relevant group, and conversely, we check
>>> - * whether the incoming thread has budget.
>>> + * Updating the remaining budget is done each time the EVL core
>>> + * schedules out a thread undergoing the quota scheduling policy,
>>>   *
>>> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
>>> - * ticks in the background, in accordance to the defined quota
>>> - * interval. Thread group budgets get replenished by its handler in
>>> - * accordance to their respective share, pushing all expired threads
>>> - * back to the run queue in the same move.
>>> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
>>> + * in the background, in accordance to the defined quota interval,
>>> + * replenishing per-group budgets, pushing all expired threads back to
>>> + * the quota ->runqueue too.
>>>   *
>>> - * NOTE: since the core logic enforcing the budget entirely happens in
>>> - * evl_quota_pick(), applying a budget change can be done as simply as
>>> - * forcing the rescheduling procedure to be invoked asap. As a result
>>> - * of this, the EVL core will ask for the next thread to run, which
>>> - * means calling evl_quota_pick() eventually.
>>> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
>>> + * apply a budget change.
>>>   *
>>> - * CAUTION: evl_quota_group->nr_active does count both the threads
>>> - * from that group linked to the sched_fifo runqueue, _and_ the
>>> - * threads moved to the local expiry queue. As a matter of fact, the
>>> - * expired threads - those for which we consumed all the per-group
>>> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
>>> - * the EVL core. This only means that the SCHED_QUOTA policy won't
>>> - * pick them until the corresponding budget is replenished.
>>> + * CAUTION: quota_group->nr_active does count both the threads from
>>> + * that group linked to the runnable queue, _and_ the threads moved to
>>> + * the local expiry queue. As a matter of fact, the expired threads -
>>> + * those for which we consumed all the per-group budget - are still
>>> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
>>> + * only means that the SCHED_QUOTA policy won't pick them until the
>>> + * corresponding budget is replenished.
>>>   */
>>>  
>>>  #define MAX_QUOTA_GROUPS  1024
>>> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>>>  
>>>  static LIST_HEAD(group_list);
>>>  
>>> -static inline bool thread_on_quota(struct evl_thread *thread,
>>> -				struct evl_quota_group *tg)
>>> +static inline bool current_on_quota(struct evl_quota_group *tg)
>>>  {
>>> -	/*
>>> -	 * Check whether @thread is running on some CPU, and belongs
>>> -	 * to quota group @tg.
>>> -	 */
>>> -	return thread->quota == tg &&
>>> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
>>> +	struct evl_rq *rq = tg->rq;
>>> +	struct evl_thread *curr = rq->curr;
>>> +	struct evl_sched_quota *qs = &rq->quota;
>>> +
>>> +	if (curr->quota != tg)
>>> +		return false;
>>> +
>>> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
>>> +		return false;
>>> +
>>> +	return evl_timer_is_running(&qs->limit_timer);
>>>  }
>>>  
>>>  static inline bool group_is_active(struct evl_quota_group *tg)
>>> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>>>  	 * runqueue, in which case tg->nr_active already accounted for
>>>  	 * it.
>>>  	 */
>>> -	return thread_on_quota(tg->rq->curr, tg);
>>> +	return current_on_quota(tg);
>>>  }
>>>  
>>>  static inline void replenish_budget(struct evl_sched_quota *qs,
>>> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>  	} else if (tg->run_credit) {
>>>  		credit = ktime_sub(tg->quota_peak, budget);
>>>  		/* Consume the accumulated credit. */
>>> -		if (tg->run_credit >= credit)
>>> +		if (tg->run_credit >= credit) {
>>>  			tg->run_credit =
>>>  				ktime_sub(tg->run_credit, credit);
>>> -		else {
>>> +		} else {
>>>  			credit = tg->run_credit;
>>>  			tg->run_credit = 0;
>>>  		}
>>> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>  
>>>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>>>  {
>>> -	struct evl_quota_group *tg;
>>>  	struct evl_thread *thread, *tmp;
>>> +	struct evl_quota_group *tg;
>>>  	struct evl_sched_quota *qs;
>>>  	struct evl_rq *rq;
>>>  
>>> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>>>  			continue;
>>>  		/*
>>> -		 * For each group living on this CPU, move all expired
>>> +		 * For each group pinned on this CPU, move all expired
>>>  		 * threads back to the runqueue. Since those threads
>>>  		 * were moved out of the runqueue as we were
>>>  		 * considering them for execution, we push them back
>>> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>  		list_for_each_entry_safe_reverse(thread, tmp,
>>>  						&tg->expired, quota_expired) {
>>>  			list_del_init(&thread->quota_expired);
>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>> +			evl_add_schedq(&qs->runnable, thread);
>>>  		}
>>>  	}
>>>  
>>> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>>>  	/*
>>>  	 * Force a rescheduling on the return path of the current
>>>  	 * interrupt, so that the budget is re-evaluated for the
>>> -	 * current group in evl_quota_pick().
>>> +	 * current group in quota_pick().
>>>  	 */
>>>  	raw_spin_lock(&rq->lock);
>>>  	evl_set_self_resched(rq);
>>> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>>>  {
>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>  
>>> +	evl_init_schedq(&qs->runnable);
>>>  	qs->period = quota_period;
>>>  	INIT_LIST_HEAD(&qs->groups);
>>>  
>>> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>>>  
>>>  static void quota_kick(struct evl_thread *thread)
>>>  {
>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>  	struct evl_quota_group *tg = thread->quota;
>>> -	struct evl_rq *rq = thread->rq;
>>>  
>>>  	/*
>>>  	 * Allow a kicked thread to be elected for running until it
>>> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>>>  	 */
>>>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>>>  		list_del_init(&thread->quota_expired);
>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>  	}
>>>  }
>>>  
>>> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>>>  
>>>  static void quota_enqueue(struct evl_thread *thread)
>>>  {
>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>  	struct evl_quota_group *tg = thread->quota;
>>> -	struct evl_rq *rq = thread->rq;
>>>  
>>>  	if (!thread_is_runnable(thread))
>>>  		list_add_tail(&thread->quota_expired, &tg->expired);
>>>  	else
>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>  
>>>  	tg->nr_active++;
>>>  }
>>>  
>>>  static void quota_dequeue(struct evl_thread *thread)
>>>  {
>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>  	struct evl_quota_group *tg = thread->quota;
>>> -	struct evl_rq *rq = thread->rq;
>>>  
>>>  	if (!list_empty(&thread->quota_expired))
>>>  		list_del_init(&thread->quota_expired);
>>>  	else
>>> -		evl_del_schedq(&rq->fifo.runnable, thread);
>>> +		evl_del_schedq(&qs->runnable, thread);
>>>  
>>>  	tg->nr_active--;
>>>  }
>>>  
>>>  static void quota_requeue(struct evl_thread *thread)
>>>  {
>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>  	struct evl_quota_group *tg = thread->quota;
>>> -	struct evl_rq *rq = thread->rq;
>>>  
>>>  	if (!thread_is_runnable(thread))
>>>  		list_add(&thread->quota_expired, &tg->expired);
>>>  	else
>>> -		evl_add_schedq(&rq->fifo.runnable, thread);
>>> +		evl_add_schedq(&qs->runnable, thread);
>>>  
>>>  	tg->nr_active++;
>>>  }
>>>  
>>> -static struct evl_thread *quota_pick(struct evl_rq *rq)
>>> +static void quota_out(struct evl_thread *thread)
>>>  {
>>> -	struct evl_thread *next, *curr = rq->curr;
>>> -	struct evl_sched_quota *qs = &rq->quota;
>>> -	struct evl_quota_group *otg, *tg;
>>> -	ktime_t now, elapsed;
>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>> +	struct evl_quota_group *tg = thread->quota;
>>> +	ktime_t now, consumed;
>>> +
>>> +	/* Timer off means that we are not tracking quota. */
>>> +	if (!evl_timer_is_running(&qs->limit_timer))
>>> +		return;
>>>  
>>> -	now = evl_ktime_monotonic();
>>> -	otg = curr->quota;
>>> -	if (otg == NULL)
>>> -		goto pick;
>>>  	/*
>>>  	 * Charge the time consumed by the outgoing thread to the
>>>  	 * group it belongs to.
>>>  	 */
>>> -	elapsed = ktime_sub(now, otg->run_start);
>>> -	if (elapsed < otg->run_budget)
>>> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
>>> -	else
>>> -		otg->run_budget = 0;
>>> +	now = evl_ktime_monotonic();
>>
>> Thing brins some, though minor, glitch when doing the run_start update
>> for the next group already in pick. That's why I'm trying to reuse that
>> stamp when available.
>>
>>> +	consumed = ktime_sub(now, tg->run_start);
>>> +	if (consumed < tg->run_budget) {
>>> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>>> +	} else {
>>> +		tg->run_budget = 0;
>>> +		evl_stop_timer(&qs->limit_timer);
>>> +	}
>>> +}
>>> +
>>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>>> +{
>>> +	struct evl_thread *next, *curr = rq->curr;
>>> +	struct evl_sched_quota *qs = &rq->quota;
>>> +	struct evl_quota_group *tg;
>>> +
>>>  pick:
>>> -	next = evl_get_schedq(&rq->fifo.runnable);
>>> +	next = evl_get_schedq(&qs->runnable);
>>>  	if (next == NULL) {
>>>  		evl_stop_timer(&qs->limit_timer);
>>>  		return NULL;
>>>  	}
>>>  
>>> -	/*
>>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>>> -	 * sure to detect non-quota threads.
>>> -	 */
>>>  	tg = next->quota;
>>> -	if (tg == NULL)
>>> -		return next;
>>> -
>>> -	tg->run_start = now;
>>> +	tg->nr_active--;
>>>  
>>>  	/*
>>>  	 * Don't consider budget if kicked, we have to allow this
>>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>  	 */
>>>  	if (next->info & EVL_T_KICKED) {
>>>  		evl_stop_timer(&qs->limit_timer);
>>> -		goto out;
>>> +		return next;
>>>  	}
>>>  
>>> +	/*
>>> +	 * __pick_next_thread() might have requeued the current
>>> +	 * thread which is still leading the pack.
>>> +	 */
>>> +	if (curr == next)
>>> +		return next;
>>> +
>>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>>> -		/* Flush expired group members as we go. */
>>> +		/* Park expired group members as we go. */
>>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>>  		goto pick;
>>>  	}
>>>  
>>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>>> -		/* Same group, leave the running timer untouched. */
>>> -		goto out;
>>> -
>>> -	/* Arm limit timer for the new running group. */
>>> -	evl_start_timer(&qs->limit_timer,
>>> -			ktime_add(now, tg->run_budget),
>>> -			EVL_INFINITE);
>>> -out:
>>> -	tg->nr_active--;
>>> +	/* Arm new limit timer on change of running group. */
>>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>>> +		tg->run_start = evl_ktime_monotonic();
>>> +		evl_start_timer(&qs->limit_timer,
>>> +				ktime_add(tg->run_start, tg->run_budget),
>>> +				EVL_INFINITE);
>>> +	}
>>>  
>>>  	return next;
>>>  }
>>> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>  	 * Unregister the group before we drop rq->lock. As a result,
>>>  	 * it won't accept threads anymore while we are busy moving
>>>  	 * the current members to the fifo class, and concurrent
>>> -	 * evl_quota_remove requests would receive -EINVAL.
>>> +	 * quota_remove requests would receive -EINVAL.
>>>  	 */
>>>  	__clear_bit(tg->tgid, group_map);
>>>  	list_del(&tg->next);
>>> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>>>  	 * ABBA issue. No livelock is possible since we unregistered
>>>  	 * that group already, so &tg->members can only be depleted
>>> -	 * (by this loop specifically).
>>> +	 * (by this loop exclusively).
>>>  	 */
>>>  
>>>  	while (!list_empty(&tg->members)) {
>>> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>  			int *quota_sum_r)
>>>  {
>>>  	struct evl_rq *rq = tg->rq;
>>> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
>>> +	struct evl_thread *thread, *tmp;
>>>  	struct evl_sched_quota *qs = &rq->quota;
>>> -	ktime_t now, elapsed, consumed;
>>>  	ktime_t old_quota = tg->quota;
>>> +	ktime_t consumed;
>>>  	u64 n;
>>>  
>>>  	assert_hard_lock(&rq->lock);
>>> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>  	tg->quota_percent = quota_percent;
>>>  	tg->quota_peak_percent = quota_peak_percent;
>>>  
>>> -	if (thread_on_quota(curr, tg)) {
>>> -		now = evl_ktime_monotonic();
>>> -
>>> -		elapsed = now - tg->run_start;
>>> -		if (elapsed < tg->run_budget)
>>> -			tg->run_budget -= elapsed;
>>> -		else
>>> -			tg->run_budget = 0;
>>> -
>>> -		tg->run_start = now;
>>> +	if (current_on_quota(tg)) {
>>> +		quota_out(rq->curr);
>>>  		evl_stop_timer(&qs->limit_timer);
>>>  	}
>>>  
>>> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>>>  						quota_expired) {
>>>  			list_del_init(&thread->quota_expired);
>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>> +			evl_add_schedq(&qs->runnable, thread);
>>>  		}
>>>  	}
>>>  
>>> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>>>  	.sched_dequeue		=	quota_dequeue,
>>>  	.sched_requeue		=	quota_requeue,
>>>  	.sched_pick		=	quota_pick,
>>> +	.sched_out		=	quota_out,
>>>  	.sched_migrate		=	quota_migrate,
>>>  	.sched_chkparam		=	quota_chkparam,
>>>  	.sched_setparam		=	quota_setparam,
>>>
>>
>> You also seem to miss the case I discovered only yesterday as well: When
>> starting the current thread on a quota via setparam, we also need to set
>> its run_start stamp at that point.
>>
>> Please have a closer look at both my test case (libevl) as well as my
>> patch 2. Currently, I don't see remaining issues with both, but I'm all
>> ears to learn what I missed.
>>
> 
> AFAIU, the logic of your changes is as follows:
> 
> 1. provide a sched_out() handler which charges the previous/current
>    group with the budget consumed by the outgoing thread, dropping such
>    accounting from sched_pick() in the same move. As a consequence,
>    charge the consumed budget in quota_limit_handler() too since you
>    can't expect sched_pick() to do so (i.e. turning on the resched bit
>    on curr->rq in quota_limit_handler() would not help in this case).

Yep.

> 
> 2. invoke sched_out() after sched_pick(), unless prev == next.
> 
> Since sched_pick() checks next->quota->run_budget to make sure that next
> may run, what if curr and next belong to the same quota group given that
> curr->quota->run_budget won't be updated by sched_out() until
> sched_pick() has returned?

Nothing is changed in case final next->quota == curr->quota. It's indeed
true that the run_budget == 0 check is not going to trigger anymore -
can be removed, the quota-timer will take care anyway.

Jan

-- 
Siemens AG, Foundational Technologies
Linux Expert Center

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  8:40         ` Jan Kiszka
@ 2026-06-15  8:55           ` Philippe Gerum
  0 siblings, 0 replies; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  8:55 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 15.06.26 10:09, Philippe Gerum wrote:
>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>> 
>>> On 15.06.26 08:42, Philippe Gerum wrote:
>>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>>
>>>>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>>>>
>>>>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>>
>>>>>> This shall be invoked before a thread switch, providing both the current
>>>>>> and the next thread as arguments. Some scheduling classes may need it to
>>>>>> correctly handle their state as the sched_pick may not be invoked when a
>>>>>> higher-weighted class is providing the next thread.
>>>>>>
>>>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>>>> ---
>>>>>>  include/evl/sched.h     | 2 ++
>>>>>>  kernel/evl/sched/core.c | 5 +++++
>>>>>>  2 files changed, 7 insertions(+)
>>>>>>
>>>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>>>> index ae9690860146c..0b16f1b1cf626 100644
>>>>>> --- a/include/evl/sched.h
>>>>>> +++ b/include/evl/sched.h
>>>>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>>>> +	void (*sched_out)(struct evl_thread *thread,
>>>>>> +			  struct evl_thread *next);
>>>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>>>  			      struct evl_rq *rq);
>>>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>>>> index eb133e334d30f..0d49fc16bd67e 100644
>>>>>> --- a/kernel/evl/sched/core.c
>>>>>> +++ b/kernel/evl/sched/core.c
>>>>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>>>>   */
>>>>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>  {
>>>>>> +	struct evl_sched_class *prev_schedclass;
>>>>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>>>>  	struct evl_thread *prev, *next, *curr;
>>>>>>  	bool leaving_inband, inband_tail;
>>>>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>>>  	this_rq->curr = next;
>>>>>>  	leaving_inband = false;
>>>>>>  
>>>>>> +	prev_schedclass = prev->sched_class;
>>>>>> +	if (prev_schedclass->sched_out)
>>>>>> +		prev_schedclass->sched_out(prev, next);
>>>>>> +
>>>>>>  	/*
>>>>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>>>>  	 * performing the rest of the context switch code
>>>>>
>>>>> I've been working on this lately too. It turns out that we need more
>>>>> than this, although this is definitely part of the solution. I'll follow
>>>>> up on this issue.
>>>>
>>>> This is still wip, I'm sharing this early to discuss details for
>>>> reconciling both proposals (the hunk in __evl_schedule() is merely
>>>> cosmetic, no functional change).
>>>>
>>>
>>> Please avoid that for the final patch. I already had troubles telling
>>> substantial from cosmetical changes apart in your lazy-blocking commit
>>> (>50% unrelated changes in there).
>>>
>>>> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
>>>> Author: Philippe Gerum <rpm@xenomai.org>
>>>> Date:   Sun Jun 14 12:08:59 2026 +0200
>>>>
>>>>     evl: sched/quota: fix budget tracking on preemption
>>>>     
>>>>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>>>>     runtime budget of the former is inaccurately tracked. This is due to
>>>>     the fifo->pick() handler returning a valid thread, which prevents the
>>>>     quota->pick() handler from being called. As a result, the last runtime
>>>>     period of the outgoing thread is not accounted for.
>>>>     
>>>>     To fix this, we introduce a new sched_out() handler which is called
>>>>     for the outgoing thread, which the quota policy uses to update the
>>>>     remaining budget of preempted threads appropriately. In addition, the
>>>>     implementation no longer shares the runnable thread queue with
>>>>     SCHED_FIFO.
>>>
>>> ...and why we need a separate queue now? Please provide reasoning for
>>> changes.
>>>
>>>>     
>>>>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
>>>>
>>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>>> index ae9690860146..cc824c28004b 100644
>>>> --- a/include/evl/sched.h
>>>> +++ b/include/evl/sched.h
>>>> @@ -120,6 +120,7 @@ struct evl_sched_class {
>>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>>> +	void (*sched_out)(struct evl_thread *thread);
>>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>>  			      struct evl_rq *rq);
>>>> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
>>>> index dfe3b7390958..dc8416645da8 100644
>>>> --- a/include/evl/sched/quota.h
>>>> +++ b/include/evl/sched/quota.h
>>>> @@ -40,6 +40,7 @@ struct evl_quota_group {
>>>>  
>>>>  struct evl_sched_quota {
>>>>  	ktime_t period;
>>>> +	struct evl_sched_queue runnable;
>>>>  	struct evl_timer refill_timer;
>>>>  	struct evl_timer limit_timer;
>>>>  	struct list_head groups;
>>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>>> index d1d025e06a5d..7127d4e52df9 100644
>>>> --- a/kernel/evl/sched/core.c
>>>> +++ b/kernel/evl/sched/core.c
>>>> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>>>>  		evl_stop_timer(&rq->rrbtimer);
>>>>  }
>>>>  
>>>> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>> +static __always_inline struct evl_thread *
>>>> +__pick_next_thread(struct evl_rq *rq)
>>>>  {
>>>>  	struct evl_sched_class *sched_class;
>>>>  	struct evl_thread *curr = rq->curr;
>>>> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>>>>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>>>>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>>>>  {
>>>> -	struct evl_thread *next = __pick_next_thread(rq);
>>>> +	struct evl_thread *next, *prev = rq->curr;
>>>> +	struct evl_sched_class *prev_class = prev->sched_class;
>>>>  
>>>> +	if (prev_class->sched_out)
>>>> +		prev_class->sched_out(prev);
>>>> +
>>>> +	next = __pick_next_thread(rq);
>>>> +	trace_evl_pick_thread(next);
>>>>  	set_next_running(rq, next);
>>>>  
>>>>  	return next;
>>>> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>>  		return;
>>>>  	}
>>>>  
>>>> +	prev = curr;
>>>>  	next = pick_next_thread(this_rq);
>>>> -	trace_evl_pick_thread(next);
>>>> -	if (next == curr) {
>>>> -		if (unlikely(next->state & EVL_T_ROOT)) {
>>>> +	if (next == prev) {
>>>> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>>>>  			if (this_rq->local_flags & RQ_TPROXY)
>>>>  				evl_notify_proxy_tick(this_rq);
>>>>  			if (this_rq->local_flags & RQ_TDEFER)
>>>>  				evl_program_local_tick(&evl_mono_clock);
>>>>  		}
>>>>  		raw_spin_unlock(&this_rq->lock);
>>>> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
>>>> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>>>>  		return;
>>>>  	}
>>>>  
>>>> -	prev = curr;
>>>>  	this_rq->curr = next;
>>>>  	leaving_inband = false;
>>>>  
>>>> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
>>>> index 0829da711a66..14748e3a843e 100644
>>>> --- a/kernel/evl/sched/quota.c
>>>> +++ b/kernel/evl/sched/quota.c
>>>> @@ -12,45 +12,35 @@
>>>>  #include <uapi/evl/sched-abi.h>
>>>>  
>>>>  /*
>>>> - * With this policy, each per-CPU runqueue maintains a list of active
>>>> - * thread groups for the sched_fifo class.
>>>> - *
>>>> - * Each time a thread is picked from the runqueue, we check whether we
>>>> - * still have budget for running it, looking at the group it belongs
>>>> - * to. If so, a timer is armed to elapse when that group has no more
>>>> - * budget, would the incoming thread run unpreempted until then
>>>> - * (i.e. evl_quota->limit_timer).
>>>> + * Each time a thread is picked from the ->runnable queue, we check
>>>> + * whether the group it belongs to still has runtime budget.  If so, a
>>>> + * timer is armed to fire when that group has no more budget, would
>>>> + * the incoming thread run unpreempted until then
>>>> + * (i.e. quota->limit_timer).
>>>>   *
>>>>   * Otherwise, if no budget remains in the group for running the
>>>>   * candidate thread, we move the latter to a local expiry queue
>>>>   * maintained by the group. This process is done on the fly as we pull
>>>> - * from the runqueue.
>>>> + * from the ->runnable queue.
>>>>   *
>>>> - * Updating the remaining budget is done each time the EVL core asks
>>>> - * for replacing the current thread with the next runnable one,
>>>> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
>>>> - * outgoing thread to the relevant group, and conversely, we check
>>>> - * whether the incoming thread has budget.
>>>> + * Updating the remaining budget is done each time the EVL core
>>>> + * schedules out a thread undergoing the quota scheduling policy,
>>>>   *
>>>> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
>>>> - * ticks in the background, in accordance to the defined quota
>>>> - * interval. Thread group budgets get replenished by its handler in
>>>> - * accordance to their respective share, pushing all expired threads
>>>> - * back to the run queue in the same move.
>>>> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
>>>> + * in the background, in accordance to the defined quota interval,
>>>> + * replenishing per-group budgets, pushing all expired threads back to
>>>> + * the quota ->runqueue too.
>>>>   *
>>>> - * NOTE: since the core logic enforcing the budget entirely happens in
>>>> - * evl_quota_pick(), applying a budget change can be done as simply as
>>>> - * forcing the rescheduling procedure to be invoked asap. As a result
>>>> - * of this, the EVL core will ask for the next thread to run, which
>>>> - * means calling evl_quota_pick() eventually.
>>>> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
>>>> + * apply a budget change.
>>>>   *
>>>> - * CAUTION: evl_quota_group->nr_active does count both the threads
>>>> - * from that group linked to the sched_fifo runqueue, _and_ the
>>>> - * threads moved to the local expiry queue. As a matter of fact, the
>>>> - * expired threads - those for which we consumed all the per-group
>>>> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
>>>> - * the EVL core. This only means that the SCHED_QUOTA policy won't
>>>> - * pick them until the corresponding budget is replenished.
>>>> + * CAUTION: quota_group->nr_active does count both the threads from
>>>> + * that group linked to the runnable queue, _and_ the threads moved to
>>>> + * the local expiry queue. As a matter of fact, the expired threads -
>>>> + * those for which we consumed all the per-group budget - are still
>>>> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
>>>> + * only means that the SCHED_QUOTA policy won't pick them until the
>>>> + * corresponding budget is replenished.
>>>>   */
>>>>  
>>>>  #define MAX_QUOTA_GROUPS  1024
>>>> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>>>>  
>>>>  static LIST_HEAD(group_list);
>>>>  
>>>> -static inline bool thread_on_quota(struct evl_thread *thread,
>>>> -				struct evl_quota_group *tg)
>>>> +static inline bool current_on_quota(struct evl_quota_group *tg)
>>>>  {
>>>> -	/*
>>>> -	 * Check whether @thread is running on some CPU, and belongs
>>>> -	 * to quota group @tg.
>>>> -	 */
>>>> -	return thread->quota == tg &&
>>>> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
>>>> +	struct evl_rq *rq = tg->rq;
>>>> +	struct evl_thread *curr = rq->curr;
>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>> +
>>>> +	if (curr->quota != tg)
>>>> +		return false;
>>>> +
>>>> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
>>>> +		return false;
>>>> +
>>>> +	return evl_timer_is_running(&qs->limit_timer);
>>>>  }
>>>>  
>>>>  static inline bool group_is_active(struct evl_quota_group *tg)
>>>> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>>>>  	 * runqueue, in which case tg->nr_active already accounted for
>>>>  	 * it.
>>>>  	 */
>>>> -	return thread_on_quota(tg->rq->curr, tg);
>>>> +	return current_on_quota(tg);
>>>>  }
>>>>  
>>>>  static inline void replenish_budget(struct evl_sched_quota *qs,
>>>> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>  	} else if (tg->run_credit) {
>>>>  		credit = ktime_sub(tg->quota_peak, budget);
>>>>  		/* Consume the accumulated credit. */
>>>> -		if (tg->run_credit >= credit)
>>>> +		if (tg->run_credit >= credit) {
>>>>  			tg->run_credit =
>>>>  				ktime_sub(tg->run_credit, credit);
>>>> -		else {
>>>> +		} else {
>>>>  			credit = tg->run_credit;
>>>>  			tg->run_credit = 0;
>>>>  		}
>>>> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>>>>  
>>>>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>  {
>>>> -	struct evl_quota_group *tg;
>>>>  	struct evl_thread *thread, *tmp;
>>>> +	struct evl_quota_group *tg;
>>>>  	struct evl_sched_quota *qs;
>>>>  	struct evl_rq *rq;
>>>>  
>>>> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>>>>  			continue;
>>>>  		/*
>>>> -		 * For each group living on this CPU, move all expired
>>>> +		 * For each group pinned on this CPU, move all expired
>>>>  		 * threads back to the runqueue. Since those threads
>>>>  		 * were moved out of the runqueue as we were
>>>>  		 * considering them for execution, we push them back
>>>> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>>>>  		list_for_each_entry_safe_reverse(thread, tmp,
>>>>  						&tg->expired, quota_expired) {
>>>>  			list_del_init(&thread->quota_expired);
>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>  		}
>>>>  	}
>>>>  
>>>> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>>>>  	/*
>>>>  	 * Force a rescheduling on the return path of the current
>>>>  	 * interrupt, so that the budget is re-evaluated for the
>>>> -	 * current group in evl_quota_pick().
>>>> +	 * current group in quota_pick().
>>>>  	 */
>>>>  	raw_spin_lock(&rq->lock);
>>>>  	evl_set_self_resched(rq);
>>>> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>>>>  {
>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>>  
>>>> +	evl_init_schedq(&qs->runnable);
>>>>  	qs->period = quota_period;
>>>>  	INIT_LIST_HEAD(&qs->groups);
>>>>  
>>>> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>>>>  
>>>>  static void quota_kick(struct evl_thread *thread)
>>>>  {
>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>  	struct evl_quota_group *tg = thread->quota;
>>>> -	struct evl_rq *rq = thread->rq;
>>>>  
>>>>  	/*
>>>>  	 * Allow a kicked thread to be elected for running until it
>>>> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>>>>  	 */
>>>>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>>>>  		list_del_init(&thread->quota_expired);
>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>>>>  
>>>>  static void quota_enqueue(struct evl_thread *thread)
>>>>  {
>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>  	struct evl_quota_group *tg = thread->quota;
>>>> -	struct evl_rq *rq = thread->rq;
>>>>  
>>>>  	if (!thread_is_runnable(thread))
>>>>  		list_add_tail(&thread->quota_expired, &tg->expired);
>>>>  	else
>>>> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
>>>> +		evl_add_schedq_tail(&qs->runnable, thread);
>>>>  
>>>>  	tg->nr_active++;
>>>>  }
>>>>  
>>>>  static void quota_dequeue(struct evl_thread *thread)
>>>>  {
>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>  	struct evl_quota_group *tg = thread->quota;
>>>> -	struct evl_rq *rq = thread->rq;
>>>>  
>>>>  	if (!list_empty(&thread->quota_expired))
>>>>  		list_del_init(&thread->quota_expired);
>>>>  	else
>>>> -		evl_del_schedq(&rq->fifo.runnable, thread);
>>>> +		evl_del_schedq(&qs->runnable, thread);
>>>>  
>>>>  	tg->nr_active--;
>>>>  }
>>>>  
>>>>  static void quota_requeue(struct evl_thread *thread)
>>>>  {
>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>>  	struct evl_quota_group *tg = thread->quota;
>>>> -	struct evl_rq *rq = thread->rq;
>>>>  
>>>>  	if (!thread_is_runnable(thread))
>>>>  		list_add(&thread->quota_expired, &tg->expired);
>>>>  	else
>>>> -		evl_add_schedq(&rq->fifo.runnable, thread);
>>>> +		evl_add_schedq(&qs->runnable, thread);
>>>>  
>>>>  	tg->nr_active++;
>>>>  }
>>>>  
>>>> -static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>> +static void quota_out(struct evl_thread *thread)
>>>>  {
>>>> -	struct evl_thread *next, *curr = rq->curr;
>>>> -	struct evl_sched_quota *qs = &rq->quota;
>>>> -	struct evl_quota_group *otg, *tg;
>>>> -	ktime_t now, elapsed;
>>>> +	struct evl_sched_quota *qs = &thread->rq->quota;
>>>> +	struct evl_quota_group *tg = thread->quota;
>>>> +	ktime_t now, consumed;
>>>> +
>>>> +	/* Timer off means that we are not tracking quota. */
>>>> +	if (!evl_timer_is_running(&qs->limit_timer))
>>>> +		return;
>>>>  
>>>> -	now = evl_ktime_monotonic();
>>>> -	otg = curr->quota;
>>>> -	if (otg == NULL)
>>>> -		goto pick;
>>>>  	/*
>>>>  	 * Charge the time consumed by the outgoing thread to the
>>>>  	 * group it belongs to.
>>>>  	 */
>>>> -	elapsed = ktime_sub(now, otg->run_start);
>>>> -	if (elapsed < otg->run_budget)
>>>> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
>>>> -	else
>>>> -		otg->run_budget = 0;
>>>> +	now = evl_ktime_monotonic();
>>>
>>> Thing brins some, though minor, glitch when doing the run_start update
>>> for the next group already in pick. That's why I'm trying to reuse that
>>> stamp when available.
>>>
>>>> +	consumed = ktime_sub(now, tg->run_start);
>>>> +	if (consumed < tg->run_budget) {
>>>> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>>>> +	} else {
>>>> +		tg->run_budget = 0;
>>>> +		evl_stop_timer(&qs->limit_timer);
>>>> +	}
>>>> +}
>>>> +
>>>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>> +{
>>>> +	struct evl_thread *next, *curr = rq->curr;
>>>> +	struct evl_sched_quota *qs = &rq->quota;
>>>> +	struct evl_quota_group *tg;
>>>> +
>>>>  pick:
>>>> -	next = evl_get_schedq(&rq->fifo.runnable);
>>>> +	next = evl_get_schedq(&qs->runnable);
>>>>  	if (next == NULL) {
>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>  		return NULL;
>>>>  	}
>>>>  
>>>> -	/*
>>>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>>>> -	 * sure to detect non-quota threads.
>>>> -	 */
>>>>  	tg = next->quota;
>>>> -	if (tg == NULL)
>>>> -		return next;
>>>> -
>>>> -	tg->run_start = now;
>>>> +	tg->nr_active--;
>>>>  
>>>>  	/*
>>>>  	 * Don't consider budget if kicked, we have to allow this
>>>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>>  	 */
>>>>  	if (next->info & EVL_T_KICKED) {
>>>>  		evl_stop_timer(&qs->limit_timer);
>>>> -		goto out;
>>>> +		return next;
>>>>  	}
>>>>  
>>>> +	/*
>>>> +	 * __pick_next_thread() might have requeued the current
>>>> +	 * thread which is still leading the pack.
>>>> +	 */
>>>> +	if (curr == next)
>>>> +		return next;
>>>> +
>>>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>>>> -		/* Flush expired group members as we go. */
>>>> +		/* Park expired group members as we go. */
>>>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>>>  		goto pick;
>>>>  	}
>>>>  
>>>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>>>> -		/* Same group, leave the running timer untouched. */
>>>> -		goto out;
>>>> -
>>>> -	/* Arm limit timer for the new running group. */
>>>> -	evl_start_timer(&qs->limit_timer,
>>>> -			ktime_add(now, tg->run_budget),
>>>> -			EVL_INFINITE);
>>>> -out:
>>>> -	tg->nr_active--;
>>>> +	/* Arm new limit timer on change of running group. */
>>>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>>>> +		tg->run_start = evl_ktime_monotonic();
>>>> +		evl_start_timer(&qs->limit_timer,
>>>> +				ktime_add(tg->run_start, tg->run_budget),
>>>> +				EVL_INFINITE);
>>>> +	}
>>>>  
>>>>  	return next;
>>>>  }
>>>> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>  	 * Unregister the group before we drop rq->lock. As a result,
>>>>  	 * it won't accept threads anymore while we are busy moving
>>>>  	 * the current members to the fifo class, and concurrent
>>>> -	 * evl_quota_remove requests would receive -EINVAL.
>>>> +	 * quota_remove requests would receive -EINVAL.
>>>>  	 */
>>>>  	__clear_bit(tg->tgid, group_map);
>>>>  	list_del(&tg->next);
>>>> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>>>>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>>>>  	 * ABBA issue. No livelock is possible since we unregistered
>>>>  	 * that group already, so &tg->members can only be depleted
>>>> -	 * (by this loop specifically).
>>>> +	 * (by this loop exclusively).
>>>>  	 */
>>>>  
>>>>  	while (!list_empty(&tg->members)) {
>>>> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>  			int *quota_sum_r)
>>>>  {
>>>>  	struct evl_rq *rq = tg->rq;
>>>> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
>>>> +	struct evl_thread *thread, *tmp;
>>>>  	struct evl_sched_quota *qs = &rq->quota;
>>>> -	ktime_t now, elapsed, consumed;
>>>>  	ktime_t old_quota = tg->quota;
>>>> +	ktime_t consumed;
>>>>  	u64 n;
>>>>  
>>>>  	assert_hard_lock(&rq->lock);
>>>> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>  	tg->quota_percent = quota_percent;
>>>>  	tg->quota_peak_percent = quota_peak_percent;
>>>>  
>>>> -	if (thread_on_quota(curr, tg)) {
>>>> -		now = evl_ktime_monotonic();
>>>> -
>>>> -		elapsed = now - tg->run_start;
>>>> -		if (elapsed < tg->run_budget)
>>>> -			tg->run_budget -= elapsed;
>>>> -		else
>>>> -			tg->run_budget = 0;
>>>> -
>>>> -		tg->run_start = now;
>>>> +	if (current_on_quota(tg)) {
>>>> +		quota_out(rq->curr);
>>>>  		evl_stop_timer(&qs->limit_timer);
>>>>  	}
>>>>  
>>>> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>>>>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>>>>  						quota_expired) {
>>>>  			list_del_init(&thread->quota_expired);
>>>> -			evl_add_schedq(&rq->fifo.runnable, thread);
>>>> +			evl_add_schedq(&qs->runnable, thread);
>>>>  		}
>>>>  	}
>>>>  
>>>> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>>>>  	.sched_dequeue		=	quota_dequeue,
>>>>  	.sched_requeue		=	quota_requeue,
>>>>  	.sched_pick		=	quota_pick,
>>>> +	.sched_out		=	quota_out,
>>>>  	.sched_migrate		=	quota_migrate,
>>>>  	.sched_chkparam		=	quota_chkparam,
>>>>  	.sched_setparam		=	quota_setparam,
>>>>
>>>
>>> You also seem to miss the case I discovered only yesterday as well: When
>>> starting the current thread on a quota via setparam, we also need to set
>>> its run_start stamp at that point.
>>>
>>> Please have a closer look at both my test case (libevl) as well as my
>>> patch 2. Currently, I don't see remaining issues with both, but I'm all
>>> ears to learn what I missed.
>>>
>> 
>> AFAIU, the logic of your changes is as follows:
>> 
>> 1. provide a sched_out() handler which charges the previous/current
>>    group with the budget consumed by the outgoing thread, dropping such
>>    accounting from sched_pick() in the same move. As a consequence,
>>    charge the consumed budget in quota_limit_handler() too since you
>>    can't expect sched_pick() to do so (i.e. turning on the resched bit
>>    on curr->rq in quota_limit_handler() would not help in this case).
>
> Yep.
>
>> 
>> 2. invoke sched_out() after sched_pick(), unless prev == next.
>> 
>> Since sched_pick() checks next->quota->run_budget to make sure that next
>> may run, what if curr and next belong to the same quota group given that
>> curr->quota->run_budget won't be updated by sched_out() until
>> sched_pick() has returned?
>
> Nothing is changed in case final next->quota == curr->quota. It's indeed
> true that the run_budget == 0 check is not going to trigger anymore -
> can be removed, the quota-timer will take care anyway.
>

For this to work, every place stopping the timer would have to call
charge_usage() then.

-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  6:42   ` Philippe Gerum
                       ` (2 preceding siblings ...)
  2026-06-15  6:58     ` Jan Kiszka
@ 2026-06-15  9:04     ` Jan Kiszka
  2026-06-15  9:59       ` Philippe Gerum
  3 siblings, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2026-06-15  9:04 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: Xenomai

On 15.06.26 08:42, Philippe Gerum wrote:
> Philippe Gerum <rpm@xenomai.org> writes:
> 
>> Jan Kiszka <jan.kiszka@siemens.com> writes:
>>
>>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>>
>>> This shall be invoked before a thread switch, providing both the current
>>> and the next thread as arguments. Some scheduling classes may need it to
>>> correctly handle their state as the sched_pick may not be invoked when a
>>> higher-weighted class is providing the next thread.
>>>
>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>> ---
>>>  include/evl/sched.h     | 2 ++
>>>  kernel/evl/sched/core.c | 5 +++++
>>>  2 files changed, 7 insertions(+)
>>>
>>> diff --git a/include/evl/sched.h b/include/evl/sched.h
>>> index ae9690860146c..0b16f1b1cf626 100644
>>> --- a/include/evl/sched.h
>>> +++ b/include/evl/sched.h
>>> @@ -120,6 +120,8 @@ struct evl_sched_class {
>>>  	void (*sched_dequeue)(struct evl_thread *thread);
>>>  	void (*sched_requeue)(struct evl_thread *thread);
>>>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
>>> +	void (*sched_out)(struct evl_thread *thread,
>>> +			  struct evl_thread *next);
>>>  	void (*sched_yield)(struct evl_thread *thread);
>>>  	void (*sched_migrate)(struct evl_thread *thread,
>>>  			      struct evl_rq *rq);
>>> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
>>> index eb133e334d30f..0d49fc16bd67e 100644
>>> --- a/kernel/evl/sched/core.c
>>> +++ b/kernel/evl/sched/core.c
>>> @@ -910,6 +910,7 @@ static __always_inline bool test_resched(struct evl_rq *this_rq)
>>>   */
>>>  void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>  {
>>> +	struct evl_sched_class *prev_schedclass;
>>>  	struct evl_rq *this_rq = this_evl_rq();
>>>  	struct evl_thread *prev, *next, *curr;
>>>  	bool leaving_inband, inband_tail;
>>> @@ -990,6 +991,10 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>>>  	this_rq->curr = next;
>>>  	leaving_inband = false;
>>>  
>>> +	prev_schedclass = prev->sched_class;
>>> +	if (prev_schedclass->sched_out)
>>> +		prev_schedclass->sched_out(prev, next);
>>> +
>>>  	/*
>>>  	 * Careful: we _must_ have updated this_rq->curr before
>>>  	 * performing the rest of the context switch code
>>
>> I've been working on this lately too. It turns out that we need more
>> than this, although this is definitely part of the solution. I'll follow
>> up on this issue.
> 
> This is still wip, I'm sharing this early to discuss details for
> reconciling both proposals (the hunk in __evl_schedule() is merely
> cosmetic, no functional change).
> 
> commit f49dbd63c1389a6b99508ef0da852545b102d1cc (HEAD -> wip/fix-quota-sched)
> Author: Philippe Gerum <rpm@xenomai.org>
> Date:   Sun Jun 14 12:08:59 2026 +0200
> 
>     evl: sched/quota: fix budget tracking on preemption
>     
>     Upon preemption of a SCHED_QUOTA thread by a SCHED_FIFO one, the
>     runtime budget of the former is inaccurately tracked. This is due to
>     the fifo->pick() handler returning a valid thread, which prevents the
>     quota->pick() handler from being called. As a result, the last runtime
>     period of the outgoing thread is not accounted for.
>     
>     To fix this, we introduce a new sched_out() handler which is called
>     for the outgoing thread, which the quota policy uses to update the
>     remaining budget of preempted threads appropriately. In addition, the
>     implementation no longer shares the runnable thread queue with
>     SCHED_FIFO.
>     
>     Signed-off-by: Philippe Gerum <rpm@xenomai.org>
> 
> diff --git a/include/evl/sched.h b/include/evl/sched.h
> index ae9690860146..cc824c28004b 100644
> --- a/include/evl/sched.h
> +++ b/include/evl/sched.h
> @@ -120,6 +120,7 @@ struct evl_sched_class {
>  	void (*sched_dequeue)(struct evl_thread *thread);
>  	void (*sched_requeue)(struct evl_thread *thread);
>  	struct evl_thread *(*sched_pick)(struct evl_rq *rq);
> +	void (*sched_out)(struct evl_thread *thread);
>  	void (*sched_yield)(struct evl_thread *thread);
>  	void (*sched_migrate)(struct evl_thread *thread,
>  			      struct evl_rq *rq);
> diff --git a/include/evl/sched/quota.h b/include/evl/sched/quota.h
> index dfe3b7390958..dc8416645da8 100644
> --- a/include/evl/sched/quota.h
> +++ b/include/evl/sched/quota.h
> @@ -40,6 +40,7 @@ struct evl_quota_group {
>  
>  struct evl_sched_quota {
>  	ktime_t period;
> +	struct evl_sched_queue runnable;
>  	struct evl_timer refill_timer;
>  	struct evl_timer limit_timer;
>  	struct list_head groups;
> diff --git a/kernel/evl/sched/core.c b/kernel/evl/sched/core.c
> index d1d025e06a5d..7127d4e52df9 100644
> --- a/kernel/evl/sched/core.c
> +++ b/kernel/evl/sched/core.c
> @@ -773,7 +773,8 @@ static inline void set_next_running(struct evl_rq *rq,
>  		evl_stop_timer(&rq->rrbtimer);
>  }
>  
> -static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
> +static __always_inline struct evl_thread *
> +__pick_next_thread(struct evl_rq *rq)
>  {
>  	struct evl_sched_class *sched_class;
>  	struct evl_thread *curr = rq->curr;
> @@ -821,8 +822,14 @@ static struct evl_thread *__pick_next_thread(struct evl_rq *rq)
>  /* rq->curr->lock + rq->lock held, hard irqs off. */
>  static struct evl_thread *pick_next_thread(struct evl_rq *rq)
>  {
> -	struct evl_thread *next = __pick_next_thread(rq);
> +	struct evl_thread *next, *prev = rq->curr;
> +	struct evl_sched_class *prev_class = prev->sched_class;
>  
> +	if (prev_class->sched_out)
> +		prev_class->sched_out(prev);
> +
> +	next = __pick_next_thread(rq);
> +	trace_evl_pick_thread(next);
>  	set_next_running(rq, next);
>  
>  	return next;
> @@ -972,21 +979,20 @@ void __evl_schedule(void) /* oob or/and hard irqs off (CPU migration-safe) */
>  		return;
>  	}
>  
> +	prev = curr;
>  	next = pick_next_thread(this_rq);
> -	trace_evl_pick_thread(next);
> -	if (next == curr) {
> -		if (unlikely(next->state & EVL_T_ROOT)) {
> +	if (next == prev) {
> +		if (unlikely(prev->state & EVL_T_ROOT)) {
>  			if (this_rq->local_flags & RQ_TPROXY)
>  				evl_notify_proxy_tick(this_rq);
>  			if (this_rq->local_flags & RQ_TDEFER)
>  				evl_program_local_tick(&evl_mono_clock);
>  		}
>  		raw_spin_unlock(&this_rq->lock);
> -		raw_spin_unlock_irqrestore(&curr->lock, flags);
> +		raw_spin_unlock_irqrestore(&prev->lock, flags);
>  		return;
>  	}
>  
> -	prev = curr;
>  	this_rq->curr = next;
>  	leaving_inband = false;
>  
> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
> index 0829da711a66..14748e3a843e 100644
> --- a/kernel/evl/sched/quota.c
> +++ b/kernel/evl/sched/quota.c
> @@ -12,45 +12,35 @@
>  #include <uapi/evl/sched-abi.h>
>  
>  /*
> - * With this policy, each per-CPU runqueue maintains a list of active
> - * thread groups for the sched_fifo class.
> - *
> - * Each time a thread is picked from the runqueue, we check whether we
> - * still have budget for running it, looking at the group it belongs
> - * to. If so, a timer is armed to elapse when that group has no more
> - * budget, would the incoming thread run unpreempted until then
> - * (i.e. evl_quota->limit_timer).
> + * Each time a thread is picked from the ->runnable queue, we check
> + * whether the group it belongs to still has runtime budget.  If so, a
> + * timer is armed to fire when that group has no more budget, would
> + * the incoming thread run unpreempted until then
> + * (i.e. quota->limit_timer).
>   *
>   * Otherwise, if no budget remains in the group for running the
>   * candidate thread, we move the latter to a local expiry queue
>   * maintained by the group. This process is done on the fly as we pull
> - * from the runqueue.
> + * from the ->runnable queue.
>   *
> - * Updating the remaining budget is done each time the EVL core asks
> - * for replacing the current thread with the next runnable one,
> - * i.e. evl_quota_pick(). There we charge the elapsed run time of the
> - * outgoing thread to the relevant group, and conversely, we check
> - * whether the incoming thread has budget.
> + * Updating the remaining budget is done each time the EVL core
> + * schedules out a thread undergoing the quota scheduling policy,
>   *
> - * Finally, a per-CPU timer (evl_quota->refill_timer) periodically
> - * ticks in the background, in accordance to the defined quota
> - * interval. Thread group budgets get replenished by its handler in
> - * accordance to their respective share, pushing all expired threads
> - * back to the run queue in the same move.
> + * Finally, a per-CPU timer (quota->refill_timer) periodically ticks
> + * in the background, in accordance to the defined quota interval,
> + * replenishing per-group budgets, pushing all expired threads back to
> + * the quota ->runqueue too.
>   *
> - * NOTE: since the core logic enforcing the budget entirely happens in
> - * evl_quota_pick(), applying a budget change can be done as simply as
> - * forcing the rescheduling procedure to be invoked asap. As a result
> - * of this, the EVL core will ask for the next thread to run, which
> - * means calling evl_quota_pick() eventually.
> + * NOTE: forcing a call to the rescheduling procedure is enoiugh to
> + * apply a budget change.
>   *
> - * CAUTION: evl_quota_group->nr_active does count both the threads
> - * from that group linked to the sched_fifo runqueue, _and_ the
> - * threads moved to the local expiry queue. As a matter of fact, the
> - * expired threads - those for which we consumed all the per-group
> - * budget - are still seen as runnable (i.e. not blocked/suspended) by
> - * the EVL core. This only means that the SCHED_QUOTA policy won't
> - * pick them until the corresponding budget is replenished.
> + * CAUTION: quota_group->nr_active does count both the threads from
> + * that group linked to the runnable queue, _and_ the threads moved to
> + * the local expiry queue. As a matter of fact, the expired threads -
> + * those for which we consumed all the per-group budget - are still
> + * seen as runnable (i.e. not blocked/suspended) by the EVL core. This
> + * only means that the SCHED_QUOTA policy won't pick them until the
> + * corresponding budget is replenished.
>   */
>  
>  #define MAX_QUOTA_GROUPS  1024
> @@ -61,15 +51,19 @@ static DECLARE_BITMAP(group_map, MAX_QUOTA_GROUPS);
>  
>  static LIST_HEAD(group_list);
>  
> -static inline bool thread_on_quota(struct evl_thread *thread,
> -				struct evl_quota_group *tg)
> +static inline bool current_on_quota(struct evl_quota_group *tg)
>  {
> -	/*
> -	 * Check whether @thread is running on some CPU, and belongs
> -	 * to quota group @tg.
> -	 */
> -	return thread->quota == tg &&
> -		!(thread->state & (EVL_T_READY|EVL_THREAD_BLOCK_MASK));
> +	struct evl_rq *rq = tg->rq;
> +	struct evl_thread *curr = rq->curr;
> +	struct evl_sched_quota *qs = &rq->quota;
> +
> +	if (curr->quota != tg)
> +		return false;
> +
> +	if (curr->state & (EVL_T_READY|EVL_T_KICKED|EVL_THREAD_BLOCK_MASK))
> +		return false;
> +
> +	return evl_timer_is_running(&qs->limit_timer);
>  }
>  
>  static inline bool group_is_active(struct evl_quota_group *tg)
> @@ -82,7 +76,7 @@ static inline bool group_is_active(struct evl_quota_group *tg)
>  	 * runqueue, in which case tg->nr_active already accounted for
>  	 * it.
>  	 */
> -	return thread_on_quota(tg->rq->curr, tg);
> +	return current_on_quota(tg);
>  }
>  
>  static inline void replenish_budget(struct evl_sched_quota *qs,
> @@ -134,10 +128,10 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>  	} else if (tg->run_credit) {
>  		credit = ktime_sub(tg->quota_peak, budget);
>  		/* Consume the accumulated credit. */
> -		if (tg->run_credit >= credit)
> +		if (tg->run_credit >= credit) {
>  			tg->run_credit =
>  				ktime_sub(tg->run_credit, credit);
> -		else {
> +		} else {
>  			credit = tg->run_credit;
>  			tg->run_credit = 0;
>  		}
> @@ -150,8 +144,8 @@ static inline void replenish_budget(struct evl_sched_quota *qs,
>  
>  static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled */
>  {
> -	struct evl_quota_group *tg;
>  	struct evl_thread *thread, *tmp;
> +	struct evl_quota_group *tg;
>  	struct evl_sched_quota *qs;
>  	struct evl_rq *rq;
>  
> @@ -167,7 +161,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>  		if (tg->run_budget == 0 || list_empty(&tg->expired))
>  			continue;
>  		/*
> -		 * For each group living on this CPU, move all expired
> +		 * For each group pinned on this CPU, move all expired
>  		 * threads back to the runqueue. Since those threads
>  		 * were moved out of the runqueue as we were
>  		 * considering them for execution, we push them back
> @@ -178,7 +172,7 @@ static void quota_refill_handler(struct evl_timer *timer) /* oob stage stalled *
>  		list_for_each_entry_safe_reverse(thread, tmp,
>  						&tg->expired, quota_expired) {
>  			list_del_init(&thread->quota_expired);
> -			evl_add_schedq(&rq->fifo.runnable, thread);
> +			evl_add_schedq(&qs->runnable, thread);
>  		}
>  	}
>  
> @@ -195,7 +189,7 @@ static void quota_limit_handler(struct evl_timer *timer) /* oob stage stalled */
>  	/*
>  	 * Force a rescheduling on the return path of the current
>  	 * interrupt, so that the budget is re-evaluated for the
> -	 * current group in evl_quota_pick().
> +	 * current group in quota_pick().
>  	 */
>  	raw_spin_lock(&rq->lock);
>  	evl_set_self_resched(rq);
> @@ -221,6 +215,7 @@ static void quota_init(struct evl_rq *rq)
>  {
>  	struct evl_sched_quota *qs = &rq->quota;
>  
> +	evl_init_schedq(&qs->runnable);
>  	qs->period = quota_period;
>  	INIT_LIST_HEAD(&qs->groups);
>  
> @@ -337,8 +332,8 @@ static void quota_forget(struct evl_thread *thread)
>  
>  static void quota_kick(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	/*
>  	 * Allow a kicked thread to be elected for running until it
> @@ -347,7 +342,7 @@ static void quota_kick(struct evl_thread *thread)
>  	 */
>  	if (tg->run_budget == 0 && !list_empty(&thread->quota_expired)) {
>  		list_del_init(&thread->quota_expired);
> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
> +		evl_add_schedq_tail(&qs->runnable, thread);
>  	}
>  }
>  
> @@ -358,79 +353,82 @@ static inline int thread_is_runnable(struct evl_thread *thread)
>  
>  static void quota_enqueue(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	if (!thread_is_runnable(thread))
>  		list_add_tail(&thread->quota_expired, &tg->expired);
>  	else
> -		evl_add_schedq_tail(&rq->fifo.runnable, thread);
> +		evl_add_schedq_tail(&qs->runnable, thread);
>  
>  	tg->nr_active++;
>  }
>  
>  static void quota_dequeue(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	if (!list_empty(&thread->quota_expired))
>  		list_del_init(&thread->quota_expired);
>  	else
> -		evl_del_schedq(&rq->fifo.runnable, thread);
> +		evl_del_schedq(&qs->runnable, thread);
>  
>  	tg->nr_active--;
>  }
>  
>  static void quota_requeue(struct evl_thread *thread)
>  {
> +	struct evl_sched_quota *qs = &thread->rq->quota;
>  	struct evl_quota_group *tg = thread->quota;
> -	struct evl_rq *rq = thread->rq;
>  
>  	if (!thread_is_runnable(thread))
>  		list_add(&thread->quota_expired, &tg->expired);
>  	else
> -		evl_add_schedq(&rq->fifo.runnable, thread);
> +		evl_add_schedq(&qs->runnable, thread);
>  
>  	tg->nr_active++;
>  }
>  
> -static struct evl_thread *quota_pick(struct evl_rq *rq)
> +static void quota_out(struct evl_thread *thread)
>  {
> -	struct evl_thread *next, *curr = rq->curr;
> -	struct evl_sched_quota *qs = &rq->quota;
> -	struct evl_quota_group *otg, *tg;
> -	ktime_t now, elapsed;
> +	struct evl_sched_quota *qs = &thread->rq->quota;
> +	struct evl_quota_group *tg = thread->quota;
> +	ktime_t now, consumed;
> +
> +	/* Timer off means that we are not tracking quota. */
> +	if (!evl_timer_is_running(&qs->limit_timer))
> +		return;
>  
> -	now = evl_ktime_monotonic();
> -	otg = curr->quota;
> -	if (otg == NULL)
> -		goto pick;
>  	/*
>  	 * Charge the time consumed by the outgoing thread to the
>  	 * group it belongs to.
>  	 */
> -	elapsed = ktime_sub(now, otg->run_start);
> -	if (elapsed < otg->run_budget)
> -		otg->run_budget = ktime_sub(otg->run_budget, elapsed);
> -	else
> -		otg->run_budget = 0;
> +	now = evl_ktime_monotonic();
> +	consumed = ktime_sub(now, tg->run_start);
> +	if (consumed < tg->run_budget) {
> +		tg->run_budget = ktime_sub(tg->run_budget, consumed);

You also need to update tg->run_start. Otherwise, you will overcharge
the next time this group is checked here.

> +	} else {
> +		tg->run_budget = 0;
> +		evl_stop_timer(&qs->limit_timer);
> +	}
> +}
> +
> +static struct evl_thread *quota_pick(struct evl_rq *rq)
> +{
> +	struct evl_thread *next, *curr = rq->curr;
> +	struct evl_sched_quota *qs = &rq->quota;
> +	struct evl_quota_group *tg;
> +
>  pick:
> -	next = evl_get_schedq(&rq->fifo.runnable);
> +	next = evl_get_schedq(&qs->runnable);
>  	if (next == NULL) {
>  		evl_stop_timer(&qs->limit_timer);
>  		return NULL;
>  	}
>  
> -	/*
> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
> -	 * sure to detect non-quota threads.
> -	 */
>  	tg = next->quota;
> -	if (tg == NULL)
> -		return next;
> -
> -	tg->run_start = now;
> +	tg->nr_active--;
>  
>  	/*
>  	 * Don't consider budget if kicked, we have to allow this
> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>  	 */
>  	if (next->info & EVL_T_KICKED) {
>  		evl_stop_timer(&qs->limit_timer);
> -		goto out;
> +		return next;
>  	}
>  
> +	/*
> +	 * __pick_next_thread() might have requeued the current
> +	 * thread which is still leading the pack.
> +	 */
> +	if (curr == next)
> +		return next;
> +
>  	if (ktime_to_ns(tg->run_budget) == 0) {
> -		/* Flush expired group members as we go. */
> +		/* Park expired group members as we go. */
>  		list_add_tail(&next->quota_expired, &tg->expired);
>  		goto pick;
>  	}
>  
> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
> -		/* Same group, leave the running timer untouched. */
> -		goto out;
> -
> -	/* Arm limit timer for the new running group. */
> -	evl_start_timer(&qs->limit_timer,
> -			ktime_add(now, tg->run_budget),
> -			EVL_INFINITE);
> -out:
> -	tg->nr_active--;
> +	/* Arm new limit timer on change of running group. */
> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
> +		tg->run_start = evl_ktime_monotonic();
> +		evl_start_timer(&qs->limit_timer,
> +				ktime_add(tg->run_start, tg->run_budget),
> +				EVL_INFINITE);
> +	}

I still don't fully get how the limit timer should be started on
setparam over the current thread. Will curr be NULL in that case, thus
curr != next? Is that the reason to go for a separate runqueue?

>  
>  	return next;
>  }
> @@ -542,7 +544,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>  	 * Unregister the group before we drop rq->lock. As a result,
>  	 * it won't accept threads anymore while we are busy moving
>  	 * the current members to the fifo class, and concurrent
> -	 * evl_quota_remove requests would receive -EINVAL.
> +	 * quota_remove requests would receive -EINVAL.
>  	 */
>  	__clear_bit(tg->tgid, group_map);
>  	list_del(&tg->next);
> @@ -556,7 +558,7 @@ static int quota_destroy_group(struct evl_quota_group *tg,
>  	 * hold rq->lock on entry, we do a trylock dance to prevent an
>  	 * ABBA issue. No livelock is possible since we unregistered
>  	 * that group already, so &tg->members can only be depleted
> -	 * (by this loop specifically).
> +	 * (by this loop exclusively).
>  	 */
>  
>  	while (!list_empty(&tg->members)) {
> @@ -583,10 +585,10 @@ static void quota_set_limit(struct evl_quota_group *tg,
>  			int *quota_sum_r)
>  {
>  	struct evl_rq *rq = tg->rq;
> -	struct evl_thread *thread, *tmp, *curr = rq->curr;
> +	struct evl_thread *thread, *tmp;
>  	struct evl_sched_quota *qs = &rq->quota;
> -	ktime_t now, elapsed, consumed;
>  	ktime_t old_quota = tg->quota;
> +	ktime_t consumed;
>  	u64 n;
>  
>  	assert_hard_lock(&rq->lock);
> @@ -615,16 +617,8 @@ static void quota_set_limit(struct evl_quota_group *tg,
>  	tg->quota_percent = quota_percent;
>  	tg->quota_peak_percent = quota_peak_percent;
>  
> -	if (thread_on_quota(curr, tg)) {
> -		now = evl_ktime_monotonic();
> -
> -		elapsed = now - tg->run_start;
> -		if (elapsed < tg->run_budget)
> -			tg->run_budget -= elapsed;
> -		else
> -			tg->run_budget = 0;
> -
> -		tg->run_start = now;
> +	if (current_on_quota(tg)) {
> +		quota_out(rq->curr);
>  		evl_stop_timer(&qs->limit_timer);
>  	}
>  
> @@ -646,7 +640,7 @@ static void quota_set_limit(struct evl_quota_group *tg,
>  		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
>  						quota_expired) {
>  			list_del_init(&thread->quota_expired);
> -			evl_add_schedq(&rq->fifo.runnable, thread);
> +			evl_add_schedq(&qs->runnable, thread);
>  		}
>  	}
>  
> @@ -786,6 +780,7 @@ struct evl_sched_class evl_sched_quota = {
>  	.sched_dequeue		=	quota_dequeue,
>  	.sched_requeue		=	quota_requeue,
>  	.sched_pick		=	quota_pick,
> +	.sched_out		=	quota_out,
>  	.sched_migrate		=	quota_migrate,
>  	.sched_chkparam		=	quota_chkparam,
>  	.sched_setparam		=	quota_setparam,
> 

Looks ok otherwise, tests pass as well.

Jan

-- 
Siemens AG, Foundational Technologies
Linux Expert Center

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  9:04     ` Jan Kiszka
@ 2026-06-15  9:59       ` Philippe Gerum
  2026-06-15 10:24         ` Jan Kiszka
  0 siblings, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2026-06-15  9:59 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Xenomai

Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 15.06.26 08:42, Philippe Gerum wrote:
>> Philippe Gerum <rpm@xenomai.org> writes:
>> 
>
> You also need to update tg->run_start. Otherwise, you will overcharge
> the next time this group is checked here.
>

Yes, this needs fixing.

>> +	} else {
>> +		tg->run_budget = 0;
>> +		evl_stop_timer(&qs->limit_timer);
>> +	}
>> +}
>> +
>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>> +{
>> +	struct evl_thread *next, *curr = rq->curr;
>> +	struct evl_sched_quota *qs = &rq->quota;
>> +	struct evl_quota_group *tg;
>> +
>>  pick:
>> -	next = evl_get_schedq(&rq->fifo.runnable);
>> +	next = evl_get_schedq(&qs->runnable);
>>  	if (next == NULL) {
>>  		evl_stop_timer(&qs->limit_timer);
>>  		return NULL;
>>  	}
>>  
>> -	/*
>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>> -	 * sure to detect non-quota threads.
>> -	 */
>>  	tg = next->quota;
>> -	if (tg == NULL)
>> -		return next;
>> -
>> -	tg->run_start = now;
>> +	tg->nr_active--;
>>  
>>  	/*
>>  	 * Don't consider budget if kicked, we have to allow this
>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>  	 */
>>  	if (next->info & EVL_T_KICKED) {
>>  		evl_stop_timer(&qs->limit_timer);
>> -		goto out;
>> +		return next;
>>  	}
>>  
>> +	/*
>> +	 * __pick_next_thread() might have requeued the current
>> +	 * thread which is still leading the pack.
>> +	 */
>> +	if (curr == next)
>> +		return next;
>> +
>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>> -		/* Flush expired group members as we go. */
>> +		/* Park expired group members as we go. */
>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>  		goto pick;
>>  	}
>>  
>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>> -		/* Same group, leave the running timer untouched. */
>> -		goto out;
>> -
>> -	/* Arm limit timer for the new running group. */
>> -	evl_start_timer(&qs->limit_timer,
>> -			ktime_add(now, tg->run_budget),
>> -			EVL_INFINITE);
>> -out:
>> -	tg->nr_active--;
>> +	/* Arm new limit timer on change of running group. */
>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>> +		tg->run_start = evl_ktime_monotonic();
>> +		evl_start_timer(&qs->limit_timer,
>> +				ktime_add(tg->run_start, tg->run_budget),
>> +				EVL_INFINITE);
>> +	}
>
> I still don't fully get how the limit timer should be started on
> setparam over the current thread. Will curr be NULL in that case, thus
> curr != next? Is that the reason to go for a separate runqueue?
>

Nope, that one is only to make things clearer, since there is no upside
in sharing the runqueues (this was inherited from SCHED_SPORADIC for no
obvious reason with hindsight). At some point, I noticed that it would
even have introduced a bug in a previous tentative implementation using
sched_class as a discriminator between threads, to figure out whether we
should call the sched_out() handler. So, the simpler, the better:
distinct classes, distinct runqueues.

Regarding the limit timer issue, sched_pick() should not shortcut on
curr == next. IOW, the rest of the code should run even in this case. By
turning on the resched bit (which the quota_setparam() caller does), we
would end up in sched_pick() with either of those outcomes out of
quota_setparam():

- 'current' switched group, in which case we would arm a limit timer in
  sched_pick() because curr->quota != next->quota (provided the shortcut
  on curr == next is gone).

- 'current' stays in the same group, so the limit timer still runs unless
  the budget was already fully consumed.

So I would revise my latest patch as follows:

diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
index 3ac650c08ccb..8f1321d46cac 100644
--- a/kernel/evl/sched/quota.c
+++ b/kernel/evl/sched/quota.c
@@ -407,6 +407,7 @@ static void quota_out(struct evl_thread *thread)
 	now = evl_ktime_monotonic();
 	consumed = ktime_sub(now, tg->run_start);
 	if (consumed < tg->run_budget) {
+		tg->run_start = now;
 		tg->run_budget = ktime_sub(tg->run_budget, consumed);
 	} else {
 		tg->run_budget = 0;
@@ -440,20 +441,13 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
 		return next;
 	}
 
-	/*
-	 * __pick_next_thread() might have requeued the current
-	 * thread which is still leading the pack.
-	 */
-	if (curr == next)
-		return next;
-
 	if (ktime_to_ns(tg->run_budget) == 0) {
 		/* Park expired group members as we go. */
 		list_add_tail(&next->quota_expired, &tg->expired);
 		goto pick;
 	}
 
-	/* Arm new limit timer on change of running group. */
+	/* Arm new limit timer if need be. */
 	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
 		tg->run_start = evl_ktime_monotonic();
 		evl_start_timer(&qs->limit_timer,

>
> Looks ok otherwise, tests pass as well.
>

Uh? Ok. That was absolutely unintentional at this stage!

-- 
Philippe.

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/2] evl/sched: Add sched_out handler to sched_class
  2026-06-15  9:59       ` Philippe Gerum
@ 2026-06-15 10:24         ` Jan Kiszka
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Kiszka @ 2026-06-15 10:24 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: Xenomai

On 15.06.26 11:59, Philippe Gerum wrote:
> Jan Kiszka <jan.kiszka@siemens.com> writes:
> 
>> On 15.06.26 08:42, Philippe Gerum wrote:
>>> Philippe Gerum <rpm@xenomai.org> writes:
>>>
>>
>> You also need to update tg->run_start. Otherwise, you will overcharge
>> the next time this group is checked here.
>>
> 
> Yes, this needs fixing.
> 
>>> +	} else {
>>> +		tg->run_budget = 0;
>>> +		evl_stop_timer(&qs->limit_timer);
>>> +	}
>>> +}
>>> +
>>> +static struct evl_thread *quota_pick(struct evl_rq *rq)
>>> +{
>>> +	struct evl_thread *next, *curr = rq->curr;
>>> +	struct evl_sched_quota *qs = &rq->quota;
>>> +	struct evl_quota_group *tg;
>>> +
>>>  pick:
>>> -	next = evl_get_schedq(&rq->fifo.runnable);
>>> +	next = evl_get_schedq(&qs->runnable);
>>>  	if (next == NULL) {
>>>  		evl_stop_timer(&qs->limit_timer);
>>>  		return NULL;
>>>  	}
>>>  
>>> -	/*
>>> -	 * As we basically piggyback on the SCHED_FIFO runqueue, make
>>> -	 * sure to detect non-quota threads.
>>> -	 */
>>>  	tg = next->quota;
>>> -	if (tg == NULL)
>>> -		return next;
>>> -
>>> -	tg->run_start = now;
>>> +	tg->nr_active--;
>>>  
>>>  	/*
>>>  	 * Don't consider budget if kicked, we have to allow this
>>> @@ -439,25 +437,29 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>>>  	 */
>>>  	if (next->info & EVL_T_KICKED) {
>>>  		evl_stop_timer(&qs->limit_timer);
>>> -		goto out;
>>> +		return next;
>>>  	}
>>>  
>>> +	/*
>>> +	 * __pick_next_thread() might have requeued the current
>>> +	 * thread which is still leading the pack.
>>> +	 */
>>> +	if (curr == next)
>>> +		return next;
>>> +
>>>  	if (ktime_to_ns(tg->run_budget) == 0) {
>>> -		/* Flush expired group members as we go. */
>>> +		/* Park expired group members as we go. */
>>>  		list_add_tail(&next->quota_expired, &tg->expired);
>>>  		goto pick;
>>>  	}
>>>  
>>> -	if (otg == tg && evl_timer_is_running(&qs->limit_timer))
>>> -		/* Same group, leave the running timer untouched. */
>>> -		goto out;
>>> -
>>> -	/* Arm limit timer for the new running group. */
>>> -	evl_start_timer(&qs->limit_timer,
>>> -			ktime_add(now, tg->run_budget),
>>> -			EVL_INFINITE);
>>> -out:
>>> -	tg->nr_active--;
>>> +	/* Arm new limit timer on change of running group. */
>>> +	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>>> +		tg->run_start = evl_ktime_monotonic();
>>> +		evl_start_timer(&qs->limit_timer,
>>> +				ktime_add(tg->run_start, tg->run_budget),
>>> +				EVL_INFINITE);
>>> +	}
>>
>> I still don't fully get how the limit timer should be started on
>> setparam over the current thread. Will curr be NULL in that case, thus
>> curr != next? Is that the reason to go for a separate runqueue?
>>
> 
> Nope, that one is only to make things clearer, since there is no upside
> in sharing the runqueues (this was inherited from SCHED_SPORADIC for no
> obvious reason with hindsight). At some point, I noticed that it would
> even have introduced a bug in a previous tentative implementation using
> sched_class as a discriminator between threads, to figure out whether we
> should call the sched_out() handler. So, the simpler, the better:
> distinct classes, distinct runqueues.
> 
> Regarding the limit timer issue, sched_pick() should not shortcut on
> curr == next. IOW, the rest of the code should run even in this case. By
> turning on the resched bit (which the quota_setparam() caller does), we
> would end up in sched_pick() with either of those outcomes out of
> quota_setparam():
> 
> - 'current' switched group, in which case we would arm a limit timer in
>   sched_pick() because curr->quota != next->quota (provided the shortcut
>   on curr == next is gone).
> 
> - 'current' stays in the same group, so the limit timer still runs unless
>   the budget was already fully consumed.
> 

Makes sense, now.

> So I would revise my latest patch as follows:
> 
> diff --git a/kernel/evl/sched/quota.c b/kernel/evl/sched/quota.c
> index 3ac650c08ccb..8f1321d46cac 100644
> --- a/kernel/evl/sched/quota.c
> +++ b/kernel/evl/sched/quota.c
> @@ -407,6 +407,7 @@ static void quota_out(struct evl_thread *thread)
>  	now = evl_ktime_monotonic();
>  	consumed = ktime_sub(now, tg->run_start);
>  	if (consumed < tg->run_budget) {
> +		tg->run_start = now;
>  		tg->run_budget = ktime_sub(tg->run_budget, consumed);
>  	} else {
>  		tg->run_budget = 0;
> @@ -440,20 +441,13 @@ static struct evl_thread *quota_pick(struct evl_rq *rq)
>  		return next;
>  	}
>  
> -	/*
> -	 * __pick_next_thread() might have requeued the current
> -	 * thread which is still leading the pack.
> -	 */
> -	if (curr == next)
> -		return next;
> -
>  	if (ktime_to_ns(tg->run_budget) == 0) {
>  		/* Park expired group members as we go. */
>  		list_add_tail(&next->quota_expired, &tg->expired);
>  		goto pick;
>  	}
>  
> -	/* Arm new limit timer on change of running group. */
> +	/* Arm new limit timer if need be. */
>  	if (curr->quota != tg || !evl_timer_is_running(&qs->limit_timer)) {
>  		tg->run_start = evl_ktime_monotonic();
>  		evl_start_timer(&qs->limit_timer,
> 
>>
>> Looks ok otherwise, tests pass as well.
>>
> 
> Uh? Ok. That was absolutely unintentional at this stage!
> 

Tests aren't perfect either: sched-quota was "tested" before, and my
current proposal apparently missed the corner-cases we just discussed above.

Jan

-- 
Siemens AG, Foundational Technologies
Linux Expert Center

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2026-06-15 10:24 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-14 19:35 [PATCH 1/2] evl/sched: Add sched_out handler to sched_class Jan Kiszka
2026-06-14 19:35 ` [PATCH 2/2] evl/sched/quota: Correct budget tracking for preempted threads Jan Kiszka
2026-06-15  8:47   ` Philippe Gerum
2026-06-15  6:28 ` [PATCH 1/2] evl/sched: Add sched_out handler to sched_class Philippe Gerum
2026-06-15  6:42   ` Philippe Gerum
2026-06-15  6:44     ` Jan Kiszka
2026-06-15  6:46     ` Jan Kiszka
2026-06-15  7:14       ` Philippe Gerum
2026-06-15  7:21         ` Jan Kiszka
2026-06-15  7:29           ` Philippe Gerum
2026-06-15  6:58     ` Jan Kiszka
2026-06-15  7:17       ` Philippe Gerum
2026-06-15  7:22         ` Jan Kiszka
2026-06-15  7:30           ` Philippe Gerum
2026-06-15  7:34             ` Philippe Gerum
2026-06-15  8:34             ` Jan Kiszka
2026-06-15  8:53               ` Philippe Gerum
2026-06-15  9:02               ` Philippe Gerum
2026-06-15  8:09       ` Philippe Gerum
2026-06-15  8:40         ` Jan Kiszka
2026-06-15  8:55           ` Philippe Gerum
2026-06-15  9:04     ` Jan Kiszka
2026-06-15  9:59       ` Philippe Gerum
2026-06-15 10:24         ` Jan Kiszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.