All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: John Stultz <jstultz@google.com>
Cc: Miroslav Lichvar <mlichvar@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Stephen Boyd <sboyd@kernel.org>,
	Anna-Maria Behnsen <anna-maria@linutronix.de>,
	Frederic Weisbecker <frederic@kernel.org>,
	Shuah Khan <shuah@kernel.org>,
	linux-kselftest@vger.kernel.org, kernel-team@android.com,
	Lei Chen <lei.chen@smartx.com>
Subject: Re: [PATCH] timekeeping: Prevent coarse clocks going backwards
Date: Fri, 18 Apr 2025 09:00:34 +0200	[thread overview]
Message-ID: <87r01qq7hp.ffs@tglx> (raw)
In-Reply-To: <87tt6mq8jz.ffs@tglx>

On Fri, Apr 18 2025 at 08:37, Thomas Gleixner wrote:
> On Thu, Apr 17 2025 at 17:46, John Stultz wrote:
>> Instead it seems like we should just do:
>>   tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
>
> You end up with the same problem again because xtime_nsec can move
> backwards when the multiplier is updated, no?

Something like the below should work.

Thanks,

        tglx
---
 include/linux/timekeeper_internal.h |   10 ++++-
 kernel/time/timekeeping.c           |   62 ++++++++++++++++++++++++++++++++----
 kernel/time/vsyscall.c              |    4 +-
 3 files changed, 65 insertions(+), 11 deletions(-)

--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -51,7 +51,7 @@ struct tk_read_base {
  * @offs_real:			Offset clock monotonic -> clock realtime
  * @offs_boot:			Offset clock monotonic -> clock boottime
  * @offs_tai:			Offset clock monotonic -> clock tai
- * @tai_offset:			The current UTC to TAI offset in seconds
+ * @coarse_nsec:		The nanoseconds part for coarse time getters
  * @tkr_raw:			The readout base structure for CLOCK_MONOTONIC_RAW
  * @raw_sec:			CLOCK_MONOTONIC_RAW  time in seconds
  * @clock_was_set_seq:		The sequence number of clock was set events
@@ -76,6 +76,8 @@ struct tk_read_base {
  *				ntp shifted nano seconds.
  * @ntp_err_mult:		Multiplication factor for scaled math conversion
  * @skip_second_overflow:	Flag used to avoid updating NTP twice with same second
+ * @tai_offset:			The current UTC to TAI offset in seconds
+ * @coarse_offset:		The offset of the coarse timekeeper in clock cycles
  *
  * Note: For timespec(64) based interfaces wall_to_monotonic is what
  * we need to add to xtime (or xtime corrected for sub jiffy times)
@@ -100,7 +102,7 @@ struct tk_read_base {
  * which results in the following cacheline layout:
  *
  * 0:	seqcount, tkr_mono
- * 1:	xtime_sec ... tai_offset
+ * 1:	xtime_sec ... coarse_nsec
  * 2:	tkr_raw, raw_sec
  * 3,4: Internal variables
  *
@@ -121,7 +123,7 @@ struct timekeeper {
 	ktime_t			offs_real;
 	ktime_t			offs_boot;
 	ktime_t			offs_tai;
-	s32			tai_offset;
+	u32			coarse_nsec;
 
 	/* Cacheline 2: */
 	struct tk_read_base	tkr_raw;
@@ -144,6 +146,8 @@ struct timekeeper {
 	u32			ntp_error_shift;
 	u32			ntp_err_mult;
 	u32			skip_second_overflow;
+	s32			tai_offset;
+	u32			coarse_offset;
 };
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -164,6 +164,15 @@ static inline struct timespec64 tk_xtime
 	return ts;
 }
 
+static inline struct timespec64 tk_xtime_coarse(const struct timekeeper *tk)
+{
+	struct timespec64 ts;
+
+	ts.tv_sec = tk->xtime_sec;
+	ts.tv_nsec = tk->coarse_nsec;
+	return ts;
+}
+
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec = ts->tv_sec;
@@ -252,6 +261,7 @@ static void tk_setup_internals(struct ti
 	tk->tkr_raw.clock = clock;
 	tk->tkr_raw.mask = clock->mask;
 	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
+	tk->coarse_offset = 0;
 
 	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = NTP_INTERVAL_LENGTH;
@@ -636,6 +646,34 @@ static void timekeeping_restore_shadow(s
 	memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper));
 }
 
+/*
+ * Update the nanoseconds part for the coarse time keepers. They can't rely
+ * on xtime_nsec because xtime_nsec is adjusted when the multiplication
+ * factor of the clock is adjusted. See timekeeping_apply_adjustment().
+ *
+ * This is required because tk_read::cycle_last must be advanced by
+ * timekeeper::cycle_interval so that the accumulation happens with a
+ * periodic reference.
+ *
+ * But that adjustment of xtime_nsec can make it go backward to compensate
+ * for a larger multiplicator.
+ *
+ * timekeeper::offset contains the leftover cycles which were not accumulated.
+ * Therefore the nanoseconds portion of the time when the clocksource was
+ * read in timekeeping_advance() is:
+ *
+ *	nsec = (xtime_nsec + offset * mult) >> shift;
+ *
+ * Calculate that value and store it in timekeeper::coarse_nsec, from where
+ * the coarse time getters consume it.
+ */
+static inline void tk_update_coarse_nsecs(struct timekeeper *tk)
+{
+	u64 offset = (u64)tk->coarse_offset * tk->tkr_mono.mult;
+
+	tk->coarse_nsec = (tk->tkr_mono.xtime_nsec + offset) >> tk->tkr_mono.shift;
+}
+
 static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action)
 {
 	struct timekeeper *tk = &tk_core.shadow_timekeeper;
@@ -658,6 +696,7 @@ static void timekeeping_update_from_shad
 
 	tk_update_leap_state(tk);
 	tk_update_ktime_data(tk);
+	tk_update_coarse_nsecs(tk);
 
 	update_vsyscall(tk);
 	update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
@@ -708,6 +747,12 @@ static void timekeeping_forward_now(stru
 		tk_normalize_xtime(tk);
 		delta -= incr;
 	}
+
+	/*
+	 * Clear the offset for the coarse time as the above forward
+	 * brought the offset down to zero.
+	 */
+	tk->coarse_offset = 0;
 }
 
 /**
@@ -804,8 +849,8 @@ EXPORT_SYMBOL_GPL(ktime_get_with_offset)
 ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned int seq;
 	ktime_t base, *offset = offsets[offs];
+	unsigned int seq;
 	u64 nsecs;
 
 	WARN_ON(timekeeping_suspended);
@@ -813,7 +858,7 @@ ktime_t ktime_get_coarse_with_offset(enu
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 		base = ktime_add(tk->tkr_mono.base, *offset);
-		nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+		nsecs = tk->coarse_nsec;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1831,6 +1876,8 @@ void timekeeping_resume(void)
 	/* Re-base the last cycle value */
 	tks->tkr_mono.cycle_last = cycle_now;
 	tks->tkr_raw.cycle_last  = cycle_now;
+	/* Reset the offset for the coarse time getters */
+	tks->coarse_offset = 0;
 
 	tks->ntp_error = 0;
 	timekeeping_suspended = 0;
@@ -2205,6 +2252,9 @@ static bool timekeeping_advance(enum tim
 	 */
 	clock_set |= accumulate_nsecs_to_secs(tk);
 
+	/* Compensate the coarse time getters xtime_nsec offset */
+	tk->coarse_offset = (u32)offset;
+
 	timekeeping_update_from_shadow(&tk_core, clock_set);
 
 	return !!clock_set;
@@ -2248,7 +2298,7 @@ void ktime_get_coarse_real_ts64(struct t
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		*ts = tk_xtime(tk);
+		*ts = tk_xtime_coarse(tk);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 }
 EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
@@ -2271,7 +2321,7 @@ void ktime_get_coarse_real_ts64_mg(struc
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		*ts = tk_xtime(tk);
+		*ts = tk_xtime_coarse(tk);
 		offset = tk_core.timekeeper.offs_real;
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -2350,12 +2400,12 @@ void ktime_get_coarse_ts64(struct timesp
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		now = tk_xtime(tk);
+		now = tk_xtime_coarse(tk);
 		mono = tk->wall_to_monotonic;
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec,
-				now.tv_nsec + mono.tv_nsec);
+				  now.tv_nsec + mono.tv_nsec);
 }
 EXPORT_SYMBOL(ktime_get_coarse_ts64);
 
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -98,12 +98,12 @@ void update_vsyscall(struct timekeeper *
 	/* CLOCK_REALTIME_COARSE */
 	vdso_ts		= &vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE];
 	vdso_ts->sec	= tk->xtime_sec;
-	vdso_ts->nsec	= tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+	vdso_ts->nsec	= tk->coarse_nsec;
 
 	/* CLOCK_MONOTONIC_COARSE */
 	vdso_ts		= &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE];
 	vdso_ts->sec	= tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-	nsec		= tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+	nsec		= tk->coarse_nsec;
 	nsec		= nsec + tk->wall_to_monotonic.tv_nsec;
 	vdso_ts->sec	+= __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec);
 

  reply	other threads:[~2025-04-18  7:00 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-20 20:03 [PATCH v2 1/2] time/timekeeping: Fix possible inconsistencies in _COARSE clockids John Stultz
2025-03-20 20:03 ` [PATCH v2 2/2] selftests/timers: Improve skew_consistency by testing with other clockids John Stultz
2025-03-21 18:35   ` [tip: timers/core] " tip-bot2 for John Stultz
2025-03-21 18:35 ` [tip: timers/core] timekeeping: Fix possible inconsistencies in _COARSE clockids tip-bot2 for John Stultz
2025-03-25 11:32 ` [PATCH v2 1/2] time/timekeeping: " Miroslav Lichvar
2025-03-27  9:22   ` Thomas Gleixner
2025-03-27 15:42     ` Miroslav Lichvar
2025-03-27 17:32       ` Thomas Gleixner
2025-03-31  7:53         ` Miroslav Lichvar
2025-04-17  2:55           ` John Stultz
2025-03-31 14:53       ` Miroslav Lichvar
2025-04-01  6:34         ` Thomas Gleixner
2025-04-01 11:19           ` Miroslav Lichvar
2025-04-01 18:29             ` Thomas Gleixner
2025-04-03  8:32               ` Miroslav Lichvar
2025-04-03 11:29                 ` Thomas Gleixner
2025-04-05 21:40                   ` [PATCH] timekeeping: Prevent coarse clocks going backwards Thomas Gleixner
2025-04-17  5:29                     ` John Stultz
2025-04-17 12:36                       ` Thomas Gleixner
2025-04-18  0:46                     ` John Stultz
2025-04-18  6:37                       ` Thomas Gleixner
2025-04-18  7:00                         ` Thomas Gleixner [this message]
2025-04-19  5:55                           ` John Stultz
2025-04-18 18:40                         ` John Stultz
2025-04-19  5:46                           ` [PATCH v3] " John Stultz
2025-04-24 16:02                             ` [tip: timers/urgent] " tip-bot2 for Thomas Gleixner
2025-04-28  9:28                             ` tip-bot2 for Thomas Gleixner
2025-04-04 17:22         ` [tip: timers/urgent] Revert "timekeeping: Fix possible inconsistencies in _COARSE clockids" tip-bot2 for Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87r01qq7hp.ffs@tglx \
    --to=tglx@linutronix.de \
    --cc=anna-maria@linutronix.de \
    --cc=frederic@kernel.org \
    --cc=jstultz@google.com \
    --cc=kernel-team@android.com \
    --cc=lei.chen@smartx.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=mlichvar@redhat.com \
    --cc=sboyd@kernel.org \
    --cc=shuah@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.