All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	patches@lists.linux.dev, Dexuan Cui <decui@microsoft.com>,
	Naman Jain <namjain@linux.microsoft.com>,
	Michael Kelley <mhklinux@outlook.com>,
	Wei Liu <wei.liu@kernel.org>
Subject: [PATCH 6.1 01/81] x86/hyperv: Fix hv tsc page based sched_clock for hibernation
Date: Mon,  6 Jan 2025 16:15:33 +0100	[thread overview]
Message-ID: <20250106151129.492910950@linuxfoundation.org> (raw)
In-Reply-To: <20250106151129.433047073@linuxfoundation.org>

6.1-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Naman Jain <namjain@linux.microsoft.com>

commit bcc80dec91ee745b3d66f3e48f0ec2efdea97149 upstream.

read_hv_sched_clock_tsc() assumes that the Hyper-V clock counter is
bigger than the variable hv_sched_clock_offset, which is cached during
early boot, but depending on the timing this assumption may be false
when a hibernated VM starts again (the clock counter starts from 0
again) and is resuming back (Note: hv_init_tsc_clocksource() is not
called during hibernation/resume); consequently,
read_hv_sched_clock_tsc() may return a negative integer (which is
interpreted as a huge positive integer since the return type is u64)
and new kernel messages are prefixed with huge timestamps before
read_hv_sched_clock_tsc() grows big enough (which typically takes
several seconds).

Fix the issue by saving the Hyper-V clock counter just before the
suspend, and using it to correct the hv_sched_clock_offset in
resume. This makes hv tsc page based sched_clock continuous and ensures
that post resume, it starts from where it left off during suspend.
Override x86_platform.save_sched_clock_state and
x86_platform.restore_sched_clock_state routines to correct this as soon
as possible.

Note: if Invariant TSC is available, the issue doesn't happen because
1) we don't register read_hv_sched_clock_tsc() for sched clock:
See commit e5313f1c5404 ("clocksource/drivers/hyper-v: Rework
clocksource and sched clock setup");
2) the common x86 code adjusts TSC similarly: see
__restore_processor_state() ->  tsc_verify_tsc_adjust(true) and
x86_platform.restore_sched_clock_state().

Cc: stable@vger.kernel.org
Fixes: 1349401ff1aa ("clocksource/drivers/hyper-v: Suspend/resume Hyper-V clocksource for hibernation")
Co-developed-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Link: https://lore.kernel.org/r/20240917053917.76787-1-namjain@linux.microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Message-ID: <20240917053917.76787-1-namjain@linux.microsoft.com>
Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mshyperv.c     |   58 +++++++++++++++++++++++++++++++++++++
 drivers/clocksource/hyperv_timer.c |   14 ++++++++
 include/clocksource/hyperv_timer.h |    2 +
 3 files changed, 73 insertions(+), 1 deletion(-)

--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -152,6 +152,63 @@ static void hv_machine_crash_shutdown(st
 	hyperv_cleanup();
 }
 #endif /* CONFIG_KEXEC_CORE */
+
+static u64 hv_ref_counter_at_suspend;
+static void (*old_save_sched_clock_state)(void);
+static void (*old_restore_sched_clock_state)(void);
+
+/*
+ * Hyper-V clock counter resets during hibernation. Save and restore clock
+ * offset during suspend/resume, while also considering the time passed
+ * before suspend. This is to make sure that sched_clock using hv tsc page
+ * based clocksource, proceeds from where it left off during suspend and
+ * it shows correct time for the timestamps of kernel messages after resume.
+ */
+static void save_hv_clock_tsc_state(void)
+{
+	hv_ref_counter_at_suspend = hv_read_reference_counter();
+}
+
+static void restore_hv_clock_tsc_state(void)
+{
+	/*
+	 * Adjust the offsets used by hv tsc clocksource to
+	 * account for the time spent before hibernation.
+	 * adjusted value = reference counter (time) at suspend
+	 *                - reference counter (time) now.
+	 */
+	hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter());
+}
+
+/*
+ * Functions to override save_sched_clock_state and restore_sched_clock_state
+ * functions of x86_platform. The Hyper-V clock counter is reset during
+ * suspend-resume and the offset used to measure time needs to be
+ * corrected, post resume.
+ */
+static void hv_save_sched_clock_state(void)
+{
+	old_save_sched_clock_state();
+	save_hv_clock_tsc_state();
+}
+
+static void hv_restore_sched_clock_state(void)
+{
+	restore_hv_clock_tsc_state();
+	old_restore_sched_clock_state();
+}
+
+static void __init x86_setup_ops_for_tsc_pg_clock(void)
+{
+	if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
+		return;
+
+	old_save_sched_clock_state = x86_platform.save_sched_clock_state;
+	x86_platform.save_sched_clock_state = hv_save_sched_clock_state;
+
+	old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
+	x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
+}
 #endif /* CONFIG_HYPERV */
 
 static uint32_t  __init ms_hyperv_platform(void)
@@ -454,6 +511,7 @@ static void __init ms_hyperv_init_platfo
 
 	/* Register Hyper-V specific clocksource */
 	hv_init_clocksource();
+	x86_setup_ops_for_tsc_pg_clock();
 #endif
 	/*
 	 * TSC should be marked as unstable only after Hyper-V
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -27,7 +27,8 @@
 #include <asm/mshyperv.h>
 
 static struct clock_event_device __percpu *hv_clock_event;
-static u64 hv_sched_clock_offset __ro_after_init;
+/* Note: offset can hold negative values after hibernation. */
+static u64 hv_sched_clock_offset __read_mostly;
 
 /*
  * If false, we're using the old mechanism for stimer0 interrupts
@@ -417,6 +418,17 @@ static void resume_hv_clock_tsc(struct c
 	hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64);
 }
 
+/*
+ * Called during resume from hibernation, from overridden
+ * x86_platform.restore_sched_clock_state routine. This is to adjust offsets
+ * used to calculate time for hv tsc page based sched_clock, to account for
+ * time spent before hibernation.
+ */
+void hv_adj_sched_clock_offset(u64 offset)
+{
+	hv_sched_clock_offset -= offset;
+}
+
 #ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
 static int hv_cs_enable(struct clocksource *cs)
 {
--- a/include/clocksource/hyperv_timer.h
+++ b/include/clocksource/hyperv_timer.h
@@ -34,6 +34,8 @@ extern void hv_init_clocksource(void);
 
 extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
 
+extern void hv_adj_sched_clock_offset(u64 offset);
+
 static inline notrace u64
 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc)
 {



  reply	other threads:[~2025-01-06 15:18 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-06 15:15 [PATCH 6.1 00/81] 6.1.124-rc1 review Greg Kroah-Hartman
2025-01-06 15:15 ` Greg Kroah-Hartman [this message]
2025-01-06 15:15 ` [PATCH 6.1 02/81] selinux: ignore unknown extended permissions Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 03/81] btrfs: fix use-after-free in btrfs_encoded_read_endio() Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 04/81] tracing: Have process_string() also allow arrays Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 05/81] thunderbolt: Add support for Intel Lunar Lake Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 06/81] thunderbolt: Add support for Intel Panther Lake-M/P Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 07/81] thunderbolt: Dont display nvm_version unless upgrade supported Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 08/81] xhci: retry Stop Endpoint on buggy NEC controllers Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 09/81] usb: xhci: Limit Stop Endpoint retries Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 10/81] xhci: Turn NEC specific quirk for handling Stop Endpoint errors generic Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 11/81] net: mctp: handle skb cleanup on sock_queue failures Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 12/81] RDMA/mlx5: Enforce same type port association for multiport RoCE Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 13/81] RDMA/bnxt_re: Add check for path mtu in modify_qp Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 14/81] RDMA/bnxt_re: Fix reporting hw_ver in query_device Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 15/81] RDMA/bnxt_re: Fix max_qp_wrs reported Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 16/81] RDMA/bnxt_re: Fix the locking while accessing the QP table Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 17/81] drm/bridge: adv7511_audio: Update Audio InfoFrame properly Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 18/81] net: dsa: microchip: Fix KSZ9477 set_ageing_time function Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 19/81] net: dsa: microchip: add ksz_rmw8() function Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 20/81] net: dsa: microchip: Fix LAN937X set_ageing_time function Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 21/81] RDMA/hns: Refactor mtr find Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 22/81] RDMA/hns: Remove unused parameters and variables Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 23/81] RDMA/hns: Fix mapping error of zero-hop WQE buffer Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 24/81] RDMA/hns: Fix warning storm caused by invalid input in IO path Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 25/81] RDMA/hns: Fix missing flush CQE for DWQE Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 26/81] net: stmmac: platform: provide devm_stmmac_probe_config_dt() Greg Kroah-Hartman
2025-01-06 15:15 ` [PATCH 6.1 27/81] net: stmmac: dont create a MDIO bus if unnecessary Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 28/81] net: stmmac: restructure the error path of stmmac_probe_config_dt() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 29/81] net: fix memory leak in tcp_conn_request() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 30/81] ipip,ip_tunnel,sit: Add FOU support for externally controlled ipip devices Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 31/81] ip_tunnel: annotate data-races around t->parms.link Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 32/81] ipv4: ip_tunnel: Unmask upper DSCP bits in ip_tunnel_bind_dev() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 33/81] ipv4: ip_tunnel: Unmask upper DSCP bits in ip_md_tunnel_xmit() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 34/81] ipv4: ip_tunnel: Unmask upper DSCP bits in ip_tunnel_xmit() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 35/81] net: Fix netns for ip_tunnel_init_flow() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 36/81] netrom: check buffer length before accessing it Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 37/81] drm/i915/dg1: Fix power gate sequence Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 38/81] netfilter: nft_set_hash: unaligned atomic read on struct nft_set_ext Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 39/81] net: llc: reset skb->transport_header Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 40/81] ALSA: usb-audio: US16x08: Initialize array before use Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 41/81] eth: bcmsysport: fix call balance of priv->clk handling routines Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 42/81] net: mv643xx_eth: fix an OF node reference leak Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 43/81] net: wwan: t7xx: Fix FSM command timeout issue Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 44/81] RDMA/rtrs: Ensure ib_sge list is accessible Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 45/81] net: reenable NETIF_F_IPV6_CSUM offload for BIG TCP packets Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 46/81] net: restrict SO_REUSEPORT to inet sockets Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 47/81] net: wwan: iosm: Properly check for valid exec stage in ipc_mmio_init() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 48/81] af_packet: fix vlan_get_tci() vs MSG_PEEK Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 49/81] af_packet: fix vlan_get_protocol_dgram() " Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 50/81] ila: serialize calls to nf_register_net_hooks() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 51/81] btrfs: rename and export __btrfs_cow_block() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 52/81] btrfs: fix use-after-free when COWing tree bock and tracing is enabled Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 53/81] wifi: mac80211: wake the queues in case of failure in resume Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 54/81] drm/amdkfd: Correct the migration DMA map direction Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 55/81] btrfs: flush delalloc workers queue before stopping cleaner kthread during unmount Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 56/81] ALSA: hda/realtek: Add new alc2xx-fixup-headset-mic model Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 57/81] sound: usb: enable DSD output for ddHiFi TC44C Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 58/81] sound: usb: format: dont warn that raw DSD is unsupported Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 59/81] bpf: fix potential error return Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 60/81] ksmbd: retry iterate_dir in smb2_query_dir Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 61/81] net: usb: qmi_wwan: add Telit FE910C04 compositions Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 62/81] Bluetooth: hci_core: Fix sleeping function called from invalid context Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 63/81] irqchip/gic: Correct declaration of *percpu_base pointer in union gic_base Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 64/81] ARC: build: Try to guess GCC variant of cross compiler Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 65/81] usb: xhci: Avoid queuing redundant Stop Endpoint commands Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 66/81] modpost: fix input MODULE_DEVICE_TABLE() built for 64-bit on 32-bit host Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 67/81] modpost: fix the missed iteration for the max bit in do_input() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 68/81] ALSA hda/realtek: Add quirk for Framework F111:000C Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 69/81] ALSA: seq: oss: Fix races at processing SysEx messages Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 70/81] kcov: mark in_softirq_really() as __always_inline Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 71/81] RDMA/uverbs: Prevent integer overflow issue Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 72/81] pinctrl: mcp23s08: Fix sleeping in atomic context due to regmap locking Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 73/81] sky2: Add device ID 11ab:4373 for Marvell 88E8075 Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 74/81] net/sctp: Prevent autoclose integer overflow in sctp_association_init() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 75/81] drm: adv7511: Drop dsi single lane support Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 76/81] dt-bindings: display: adi,adv7533: Drop " Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 77/81] mm/readahead: fix large folio support in async readahead Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 78/81] mm: vmscan: account for free pages to prevent infinite Loop in throttle_direct_reclaim() Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 79/81] mptcp: fix TCP options overflow Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 80/81] mptcp: fix recvbuffer adjust on sleeping rcvmsg Greg Kroah-Hartman
2025-01-06 15:16 ` [PATCH 6.1 81/81] mptcp: dont always assume copied data in mptcp_cleanup_rbuf() Greg Kroah-Hartman
2025-01-06 18:22 ` [PATCH 6.1 00/81] 6.1.124-rc1 review Pavel Machek
2025-01-06 19:29 ` Florian Fainelli
2025-01-06 22:26 ` Peter Schneider
2025-01-07  0:22 ` SeongJae Park
2025-01-07  7:10 ` Ron Economos
2025-01-07 12:33 ` Mark Brown
2025-01-07 12:36 ` Naresh Kamboju
2025-01-07 12:44 ` Jon Hunter
2025-01-07 20:59 ` [PATCH 6.1] " Hardik Garg
2025-01-07 23:16 ` [PATCH 6.1 00/81] " Shuah Khan
2025-01-08 12:54 ` Muhammad Usama Anjum

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250106151129.492910950@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=decui@microsoft.com \
    --cc=mhklinux@outlook.com \
    --cc=namjain@linux.microsoft.com \
    --cc=patches@lists.linux.dev \
    --cc=stable@vger.kernel.org \
    --cc=wei.liu@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.