public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 6.1.y 1/1] hrtimer: Use and report correct timerslack values for realtime tasks
@ 2025-03-11 14:07 Felix Moessbauer
  2025-03-13  9:11 ` Sasha Levin
  0 siblings, 1 reply; 2+ messages in thread
From: Felix Moessbauer @ 2025-03-11 14:07 UTC (permalink / raw)
  To: stable
  Cc: Felix Moessbauer, linux-kernel, tglx, qyousef, frederic,
	jan.kiszka, bigeasy, anna-maria

commit ed4fb6d7ef68111bb539283561953e5c6e9a6e38 upstream.

The timerslack_ns setting is used to specify how much the hardware
timers should be delayed, to potentially dispatch multiple timers in a
single interrupt. This is a performance optimization. Timers of
realtime tasks (having a realtime scheduling policy) should not be
delayed.

This logic was inconsitently applied to the hrtimers, leading to delays
of realtime tasks which used timed waits for events (e.g. condition
variables). Due to the downstream override of the slack for rt tasks,
the procfs reported incorrect (non-zero) timerslack_ns values.

This is changed by setting the timer_slack_ns task attribute to 0 for
all tasks with a rt policy. By that, downstream users do not need to
specially handle rt tasks (w.r.t. the slack), and the procfs entry
shows the correct value of "0". Setting non-zero slack values (either
via procfs or PR_SET_TIMERSLACK) on tasks with a rt policy is ignored,
as stated in "man 2 PR_SET_TIMERSLACK":

  Timer slack is not applied to threads that are scheduled under a
  real-time scheduling policy (see sched_setscheduler(2)).

The special handling of timerslack on rt tasks in downstream users
is removed as well.

Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20240814121032.368444-2-felix.moessbauer@siemens.com
---
 fs/proc/base.c        |  9 +++++----
 fs/select.c           | 11 ++++-------
 kernel/sched/core.c   |  8 ++++++++
 kernel/sys.c          |  2 ++
 kernel/time/hrtimer.c | 18 +++---------------
 5 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ecc45389ea793..82e4a8805bae6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2633,10 +2633,11 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
 	}
 
 	task_lock(p);
-	if (slack_ns == 0)
-		p->timer_slack_ns = p->default_timer_slack_ns;
-	else
-		p->timer_slack_ns = slack_ns;
+	if (task_is_realtime(p))
+		slack_ns = 0;
+	else if (slack_ns == 0)
+		slack_ns = p->default_timer_slack_ns;
+	p->timer_slack_ns = slack_ns;
 	task_unlock(p);
 
 out:
diff --git a/fs/select.c b/fs/select.c
index 3f730b8581f65..e66b6189845ea 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -77,19 +77,16 @@ u64 select_estimate_accuracy(struct timespec64 *tv)
 {
 	u64 ret;
 	struct timespec64 now;
+	u64 slack = current->timer_slack_ns;
 
-	/*
-	 * Realtime tasks get a slack of 0 for obvious reasons.
-	 */
-
-	if (rt_task(current))
+	if (slack == 0)
 		return 0;
 
 	ktime_get_ts64(&now);
 	now = timespec64_sub(*tv, now);
 	ret = __estimate_accuracy(&now);
-	if (ret < current->timer_slack_ns)
-		return current->timer_slack_ns;
+	if (ret < slack)
+		return slack;
 	return ret;
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0a483fd9f5de5..9be8a509b5f3f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7380,6 +7380,14 @@ static void __setscheduler_params(struct task_struct *p,
 	else if (fair_policy(policy))
 		p->static_prio = NICE_TO_PRIO(attr->sched_nice);
 
+	/* rt-policy tasks do not have a timerslack */
+	if (task_is_realtime(p)) {
+		p->timer_slack_ns = 0;
+	} else if (p->timer_slack_ns == 0) {
+		/* when switching back to non-rt policy, restore timerslack */
+		p->timer_slack_ns = p->default_timer_slack_ns;
+	}
+
 	/*
 	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
 	 * !rt_policy. Always setting this ensures that things like
diff --git a/kernel/sys.c b/kernel/sys.c
index d06eda1387b69..06a9a87a8d3e0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2477,6 +2477,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			error = current->timer_slack_ns;
 		break;
 	case PR_SET_TIMERSLACK:
+		if (task_is_realtime(current))
+			break;
 		if (arg2 <= 0)
 			current->timer_slack_ns =
 					current->default_timer_slack_ns;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 8db65e2db14c7..f6d799646dd9c 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -2090,14 +2090,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
 	struct restart_block *restart;
 	struct hrtimer_sleeper t;
 	int ret = 0;
-	u64 slack;
-
-	slack = current->timer_slack_ns;
-	if (dl_task(current) || rt_task(current))
-		slack = 0;
 
 	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
-	hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
+	hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns);
 	ret = do_nanosleep(&t, mode);
 	if (ret != -ERESTART_RESTARTBLOCK)
 		goto out;
@@ -2278,7 +2273,7 @@ void __init hrtimers_init(void)
 /**
  * schedule_hrtimeout_range_clock - sleep until timeout
  * @expires:	timeout value (ktime_t)
- * @delta:	slack in expires timeout (ktime_t) for SCHED_OTHER tasks
+ * @delta:	slack in expires timeout (ktime_t)
  * @mode:	timer mode
  * @clock_id:	timer clock to be used
  */
@@ -2305,13 +2300,6 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
 		return -EINTR;
 	}
 
-	/*
-	 * Override any slack passed by the user if under
-	 * rt contraints.
-	 */
-	if (rt_task(current))
-		delta = 0;
-
 	hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
 	hrtimer_sleeper_start_expires(&t, mode);
@@ -2331,7 +2319,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
 /**
  * schedule_hrtimeout_range - sleep until timeout
  * @expires:	timeout value (ktime_t)
- * @delta:	slack in expires timeout (ktime_t) for SCHED_OTHER tasks
+ * @delta:	slack in expires timeout (ktime_t)
  * @mode:	timer mode
  *
  * Make the current task sleep until the given expiry time has
-- 
2.47.2


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH 6.1.y 1/1] hrtimer: Use and report correct timerslack values for realtime tasks
  2025-03-11 14:07 [PATCH 6.1.y 1/1] hrtimer: Use and report correct timerslack values for realtime tasks Felix Moessbauer
@ 2025-03-13  9:11 ` Sasha Levin
  0 siblings, 0 replies; 2+ messages in thread
From: Sasha Levin @ 2025-03-13  9:11 UTC (permalink / raw)
  To: stable; +Cc: Felix Moessbauer, Sasha Levin

[ Sasha's backport helper bot ]

Hi,

✅ All tests passed successfully. No issues detected.
No action required from the submitter.

The upstream commit SHA1 provided is correct: ed4fb6d7ef68111bb539283561953e5c6e9a6e38

Status in newer kernel trees:
6.13.y | Present (exact SHA1)
6.12.y | Present (exact SHA1)
6.6.y | Not found

Note: The patch differs from the upstream commit:
---
1:  ed4fb6d7ef681 ! 1:  a4d3916d07d4c hrtimer: Use and report correct timerslack values for realtime tasks
    @@ Metadata
      ## Commit message ##
         hrtimer: Use and report correct timerslack values for realtime tasks
     
    +    commit ed4fb6d7ef68111bb539283561953e5c6e9a6e38 upstream.
    +
         The timerslack_ns setting is used to specify how much the hardware
         timers should be delayed, to potentially dispatch multiple timers in a
         single interrupt. This is a performance optimization. Timers of
    @@ fs/select.c: u64 select_estimate_accuracy(struct timespec64 *tv)
      }
      
     
    - ## kernel/sched/syscalls.c ##
    -@@ kernel/sched/syscalls.c: static void __setscheduler_params(struct task_struct *p,
    + ## kernel/sched/core.c ##
    +@@ kernel/sched/core.c: static void __setscheduler_params(struct task_struct *p,
      	else if (fair_policy(policy))
      		p->static_prio = NICE_TO_PRIO(attr->sched_nice);
      
    @@ kernel/time/hrtimer.c: long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_m
     -	u64 slack;
     -
     -	slack = current->timer_slack_ns;
    --	if (rt_task(current))
    +-	if (dl_task(current) || rt_task(current))
     -		slack = 0;
      
      	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
---

Results of testing on various branches:

| Branch                    | Patch Apply | Build Test |
|---------------------------|-------------|------------|
| stable/linux-6.1.y        |  Success    |  Success   |

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-03-13  9:11 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-11 14:07 [PATCH 6.1.y 1/1] hrtimer: Use and report correct timerslack values for realtime tasks Felix Moessbauer
2025-03-13  9:11 ` Sasha Levin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox