public inbox for linux-hyperv@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] kexec: Refuse kernel-unsafe Microsoft Hypervisor transitions
@ 2026-01-28 17:41 Stanislav Kinsburskii
  2026-01-28 17:42 ` [PATCH 1/2] kexec: Add permission notifier chain for kexec operations Stanislav Kinsburskii
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Stanislav Kinsburskii @ 2026-01-28 17:41 UTC (permalink / raw)
  To: rppt, akpm, bhe, kys, haiyangz, wei.liu, decui, longli
  Cc: kexec, linux-hyperv, linux-kernel

When Microsoft Hypervisor is active, the kernel may have memory “deposited”
to the hypervisor. Those pages are no longer safe for the kernel to touch,
and attempting to access them can trigger a GPF. The problem becomes acute
with kexec: the “deposited pages” state does not survive the transition,
and the next kernel has no reliable way to know which pages are still
owned/managed by the hypervisor.

Until there is a proper handoff mechanism to preserve that state across
kexec, the only safe behavior is to refuse kexec whenever there is shared
hypervisor state that cannot survive the transition—most notably deposited
pages, and also cases where VMs are still running.

This series adds the missing kexec integration point needed by MSHV: a
callback at the kexec “freeze” stage so the driver can make the transition
safe (or block it). With this hook, MSHV can refuse kexec while VMs are
running, attempt to withdraw deposited pages when possible (e.g. L1VH
host), and fail the transition if any pages remain deposited.

---

Stanislav Kinsburskii (2):
      kexec: Add permission notifier chain for kexec operations
      mshv: Add kexec blocking support


 drivers/hv/Makefile            |    1 +
 drivers/hv/hv_proc.c           |    4 ++
 drivers/hv/mshv_kexec.c        |   66 ++++++++++++++++++++++++++++++++++++++++
 drivers/hv/mshv_root.h         |   14 ++++++++
 drivers/hv/mshv_root_hv_call.c |    2 +
 drivers/hv/mshv_root_main.c    |    7 ++++
 include/linux/kexec.h          |    6 ++++
 kernel/kexec_core.c            |   24 +++++++++++++++
 8 files changed, 124 insertions(+)
 create mode 100644 drivers/hv/mshv_kexec.c


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] kexec: Add permission notifier chain for kexec operations
  2026-01-28 17:41 [PATCH 0/2] kexec: Refuse kernel-unsafe Microsoft Hypervisor transitions Stanislav Kinsburskii
@ 2026-01-28 17:42 ` Stanislav Kinsburskii
  2026-02-12 22:12   ` Mukesh R
  2026-01-28 17:42 ` [PATCH 2/2] mshv: Add kexec blocking support Stanislav Kinsburskii
  2026-02-11 23:30 ` [PATCH 0/2] kexec: Refuse kernel-unsafe Microsoft Hypervisor transitions Stanislav Kinsburskii
  2 siblings, 1 reply; 9+ messages in thread
From: Stanislav Kinsburskii @ 2026-01-28 17:42 UTC (permalink / raw)
  To: rppt, akpm, bhe, kys, haiyangz, wei.liu, decui, longli
  Cc: kexec, linux-hyperv, linux-kernel

Add a blocking notifier chain to allow subsystems to be notified
before kexec execution. This enables modules to perform necessary
cleanup or validation before the system transitions to a new kernel or
block kexec if not possible under current conditions.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 include/linux/kexec.h |    6 ++++++
 kernel/kexec_core.c   |   24 ++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ff7e231b0485..311037d30f9e 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -35,6 +35,7 @@ extern note_buf_t __percpu *crash_notes;
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/highmem.h>
+#include <linux/notifier.h>
 #include <asm/kexec.h>
 #include <linux/crash_core.h>
 
@@ -532,10 +533,13 @@ extern bool kexec_file_dbg_print;
 
 extern void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size);
 extern void kimage_unmap_segment(void *buffer);
+extern int kexec_block_notifier_register(struct notifier_block *nb);
+extern int kexec_block_notifier_unregister(struct notifier_block *nb);
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
 struct kimage;
+struct notifier_block;
 static inline void __crash_kexec(struct pt_regs *regs) { }
 static inline void crash_kexec(struct pt_regs *regs) { }
 static inline int kexec_should_crash(struct task_struct *p) { return 0; }
@@ -543,6 +547,8 @@ static inline int kexec_crash_loaded(void) { return 0; }
 static inline void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size)
 { return NULL; }
 static inline void kimage_unmap_segment(void *buffer) { }
+static inline int kexec_block_notifier_register(struct notifier_block *nb) { }
+static inline int kexec_block_notifier_unregister(struct notifier_block *nb) { }
 #define kexec_in_progress false
 #endif /* CONFIG_KEXEC_CORE */
 
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 0f92acdd354d..1e86a6f175f0 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -57,6 +57,20 @@ bool kexec_in_progress = false;
 
 bool kexec_file_dbg_print;
 
+static BLOCKING_NOTIFIER_HEAD(kexec_block_list);
+
+int kexec_block_notifier_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&kexec_block_list, nb);
+}
+EXPORT_SYMBOL_GPL(kexec_block_notifier_register);
+
+int kexec_block_notifier_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&kexec_block_list, nb);
+}
+EXPORT_SYMBOL_GPL(kexec_block_notifier_unregister);
+
 /*
  * When kexec transitions to the new kernel there is a one-to-one
  * mapping between physical and virtual addresses.  On processors
@@ -1124,6 +1138,12 @@ bool kexec_load_permitted(int kexec_image_type)
 	return true;
 }
 
+static int kexec_check_blockers(void)
+{
+	/* Notify subsystems of impending kexec */
+	return blocking_notifier_call_chain(&kexec_block_list, 0, NULL);
+}
+
 /*
  * Move into place and start executing a preloaded standalone
  * executable.  If nothing was preloaded return an error.
@@ -1139,6 +1159,10 @@ int kernel_kexec(void)
 		goto Unlock;
 	}
 
+	error = kexec_check_blockers();
+	if (error)
+		goto Unlock;
+
 	error = liveupdate_reboot();
 	if (error)
 		goto Unlock;



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] mshv: Add kexec blocking support
  2026-01-28 17:41 [PATCH 0/2] kexec: Refuse kernel-unsafe Microsoft Hypervisor transitions Stanislav Kinsburskii
  2026-01-28 17:42 ` [PATCH 1/2] kexec: Add permission notifier chain for kexec operations Stanislav Kinsburskii
@ 2026-01-28 17:42 ` Stanislav Kinsburskii
  2026-02-12 22:11   ` Mukesh R
  2026-02-11 23:30 ` [PATCH 0/2] kexec: Refuse kernel-unsafe Microsoft Hypervisor transitions Stanislav Kinsburskii
  2 siblings, 1 reply; 9+ messages in thread
From: Stanislav Kinsburskii @ 2026-01-28 17:42 UTC (permalink / raw)
  To: rppt, akpm, bhe, kys, haiyangz, wei.liu, decui, longli
  Cc: kexec, linux-hyperv, linux-kernel

Add kexec notifier to prevent kexec when VMs are active or memory
is deposited. The notifier blocks kexec operations if:
- Active VMs exist in the partition table
- Pages are still deposited to the hypervisor

The kernel cannot access hypervisor deposited pages: any access
triggers a GPF. Until the deposited page state can be handed over
to the next kernel, kexec must be blocked if there is any shared
state between kernel and hypervisor.

For L1 host virtualization, attempt to withdraw all deposited memory before
allowing kexec to proceed. If withdrawal fails or pages remain deposited
block the kexec operation.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 drivers/hv/Makefile            |    1 +
 drivers/hv/hv_proc.c           |    4 ++
 drivers/hv/mshv_kexec.c        |   66 ++++++++++++++++++++++++++++++++++++++++
 drivers/hv/mshv_root.h         |   14 ++++++++
 drivers/hv/mshv_root_hv_call.c |    2 +
 drivers/hv/mshv_root_main.c    |    7 ++++
 6 files changed, 94 insertions(+)
 create mode 100644 drivers/hv/mshv_kexec.c

diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index a49f93c2d245..bb72be5cc525 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -15,6 +15,7 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING)	+= hv_debugfs.o
 hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
 mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
 	       mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o
+mshv_root-$(CONFIG_KEXEC) += mshv_kexec.o
 mshv_vtl-y := mshv_vtl_main.o
 
 # Code that must be built-in
diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
index 89870c1b0087..39bbbedb0340 100644
--- a/drivers/hv/hv_proc.c
+++ b/drivers/hv/hv_proc.c
@@ -15,6 +15,8 @@
  */
 #define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1)
 
+atomic_t hv_pages_deposited;
+
 /* Deposits exact number of pages. Must be called with interrupts enabled.  */
 int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
 {
@@ -93,6 +95,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
 		goto err_free_allocations;
 	}
 
+	atomic_add(page_count, &hv_pages_deposited);
+
 	ret = 0;
 	goto free_buf;
 
diff --git a/drivers/hv/mshv_kexec.c b/drivers/hv/mshv_kexec.c
new file mode 100644
index 000000000000..5222b2e4ff97
--- /dev/null
+++ b/drivers/hv/mshv_kexec.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2026, Microsoft Corporation.
+ *
+ * Live update orchestration management for mshv_root module.
+ *
+ * Author: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
+ */
+
+#include <linux/kexec.h>
+#include <linux/notifier.h>
+#include <asm/mshyperv.h>
+#include "mshv_root.h"
+
+static BLOCKING_NOTIFIER_HEAD(overlay_notify_chain);
+
+static int mshv_block_kexec_notify(struct notifier_block *nb,
+				   unsigned long action, void *arg)
+{
+	if (!hash_empty(mshv_root.pt_htable)) {
+		pr_warn("mshv: Cannot perform kexec while VMs are active\n");
+		return -EBUSY;
+	}
+
+	if (hv_l1vh_partition()) {
+		int err;
+
+		/* Attempt to withdraw all the deposited pages */
+		err = hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE,
+					      hv_current_partition_id);
+		if (err) {
+			pr_err("mshv: Failed to withdraw memory from L1 virtualization: %d\n",
+			       err);
+			return err;
+		}
+	}
+
+	if (atomic_read(&hv_pages_deposited)) {
+		pr_warn("mshv: Cannot perform kexec while pages are deposited\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+static struct notifier_block mshv_kexec_notifier = {
+	.notifier_call = mshv_block_kexec_notify,
+};
+
+int __init mshv_kexec_init(void)
+{
+	int err;
+
+	err = kexec_block_notifier_register(&mshv_kexec_notifier);
+	if (err) {
+		pr_err("mshv: Could not register kexec notifier: %pe\n",
+		       ERR_PTR(err));
+		return err;
+	}
+
+	return 0;
+}
+
+void __exit mshv_kexec_exit(void)
+{
+	(void)kexec_block_notifier_unregister(&mshv_kexec_notifier);
+}
diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
index 3c1d88b36741..311f76262d10 100644
--- a/drivers/hv/mshv_root.h
+++ b/drivers/hv/mshv_root.h
@@ -17,6 +17,7 @@
 #include <linux/build_bug.h>
 #include <linux/mmu_notifier.h>
 #include <uapi/linux/mshv.h>
+#include <hyperv/hvhdk.h>
 
 /*
  * Hypervisor must be between these version numbers (inclusive)
@@ -319,6 +320,7 @@ int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 a
 extern struct mshv_root mshv_root;
 extern enum hv_scheduler_type hv_scheduler_type;
 extern u8 * __percpu *hv_synic_eventring_tail;
+extern atomic_t hv_pages_deposited;
 
 struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
 					   u64 uaddr, u32 flags);
@@ -333,4 +335,16 @@ bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn);
 void mshv_region_movable_fini(struct mshv_mem_region *region);
 bool mshv_region_movable_init(struct mshv_mem_region *region);
 
+#if IS_ENABLED(CONFIG_KEXEC)
+int mshv_kexec_init(void);
+void mshv_kexec_exit(void);
+#else
+static inline int mshv_kexec_init(void)
+{
+	return 0;
+}
+
+static inline void mshv_kexec_exit(void) { }
+#endif
+
 #endif /* _MSHV_ROOT_H_ */
diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
index 06f2bac8039d..4203af5190ee 100644
--- a/drivers/hv/mshv_root_hv_call.c
+++ b/drivers/hv/mshv_root_hv_call.c
@@ -73,6 +73,8 @@ int hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
 		for (i = 0; i < completed; i++)
 			__free_page(pfn_to_page(output_page->gpa_page_list[i]));
 
+		atomic_sub(completed, &hv_pages_deposited);
+
 		if (!hv_result_success(status)) {
 			if (hv_result(status) == HV_STATUS_NO_RESOURCES)
 				status = HV_STATUS_SUCCESS;
diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
index 5fc572e31cd7..d55aa69d130c 100644
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -2330,6 +2330,10 @@ static int __init mshv_parent_partition_init(void)
 	if (ret)
 		goto deinit_root_scheduler;
 
+	ret = mshv_kexec_init();
+	if (ret)
+		goto deinit_irqfd_wq;
+
 	spin_lock_init(&mshv_root.pt_ht_lock);
 	hash_init(mshv_root.pt_htable);
 
@@ -2337,6 +2341,8 @@ static int __init mshv_parent_partition_init(void)
 
 	return 0;
 
+deinit_irqfd_wq:
+	mshv_irqfd_wq_cleanup();
 deinit_root_scheduler:
 	root_scheduler_deinit();
 exit_partition:
@@ -2356,6 +2362,7 @@ static void __exit mshv_parent_partition_exit(void)
 	hv_setup_mshv_handler(NULL);
 	mshv_port_table_fini();
 	misc_deregister(&mshv_dev);
+	mshv_kexec_exit();
 	mshv_irqfd_wq_cleanup();
 	root_scheduler_deinit();
 	if (hv_root_partition())



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/2] kexec: Refuse kernel-unsafe Microsoft Hypervisor transitions
  2026-01-28 17:41 [PATCH 0/2] kexec: Refuse kernel-unsafe Microsoft Hypervisor transitions Stanislav Kinsburskii
  2026-01-28 17:42 ` [PATCH 1/2] kexec: Add permission notifier chain for kexec operations Stanislav Kinsburskii
  2026-01-28 17:42 ` [PATCH 2/2] mshv: Add kexec blocking support Stanislav Kinsburskii
@ 2026-02-11 23:30 ` Stanislav Kinsburskii
  2 siblings, 0 replies; 9+ messages in thread
From: Stanislav Kinsburskii @ 2026-02-11 23:30 UTC (permalink / raw)
  To: rppt, akpm, bhe, kys, haiyangz, wei.liu, decui, longli
  Cc: kexec, linux-hyperv, linux-kernel

On Wed, Jan 28, 2026 at 05:41:56PM +0000, Stanislav Kinsburskii wrote:
> When Microsoft Hypervisor is active, the kernel may have memory “deposited”
> to the hypervisor. Those pages are no longer safe for the kernel to touch,
> and attempting to access them can trigger a GPF. The problem becomes acute
> with kexec: the “deposited pages” state does not survive the transition,
> and the next kernel has no reliable way to know which pages are still
> owned/managed by the hypervisor.
> 
> Until there is a proper handoff mechanism to preserve that state across
> kexec, the only safe behavior is to refuse kexec whenever there is shared
> hypervisor state that cannot survive the transition—most notably deposited
> pages, and also cases where VMs are still running.
> 
> This series adds the missing kexec integration point needed by MSHV: a
> callback at the kexec “freeze” stage so the driver can make the transition
> safe (or block it). With this hook, MSHV can refuse kexec while VMs are
> running, attempt to withdraw deposited pages when possible (e.g. L1VH
> host), and fail the transition if any pages remain deposited.
> 
> ---
> 
> Stanislav Kinsburskii (2):
>       kexec: Add permission notifier chain for kexec operations
>       mshv: Add kexec blocking support
> 

Hi,

I’m sending a gentle follow‑up on the patch series below, which I posted
about two weeks ago. I wanted to check whether anyone has had a chance
to look at it, or if there are concerns I should address.

Any feedback would be appreciated.

Thanks for your time.

Best regards,
Stanislav

> 
>  drivers/hv/Makefile            |    1 +
>  drivers/hv/hv_proc.c           |    4 ++
>  drivers/hv/mshv_kexec.c        |   66 ++++++++++++++++++++++++++++++++++++++++
>  drivers/hv/mshv_root.h         |   14 ++++++++
>  drivers/hv/mshv_root_hv_call.c |    2 +
>  drivers/hv/mshv_root_main.c    |    7 ++++
>  include/linux/kexec.h          |    6 ++++
>  kernel/kexec_core.c            |   24 +++++++++++++++
>  8 files changed, 124 insertions(+)
>  create mode 100644 drivers/hv/mshv_kexec.c
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] mshv: Add kexec blocking support
  2026-01-28 17:42 ` [PATCH 2/2] mshv: Add kexec blocking support Stanislav Kinsburskii
@ 2026-02-12 22:11   ` Mukesh R
  2026-02-18  8:14     ` Wei Liu
  2026-02-19 22:16     ` Stanislav Kinsburskii
  0 siblings, 2 replies; 9+ messages in thread
From: Mukesh R @ 2026-02-12 22:11 UTC (permalink / raw)
  To: Stanislav Kinsburskii, rppt, akpm, bhe, kys, haiyangz, wei.liu,
	decui, longli
  Cc: kexec, linux-hyperv, linux-kernel

On 1/28/26 09:42, Stanislav Kinsburskii wrote:
> Add kexec notifier to prevent kexec when VMs are active or memory
> is deposited. The notifier blocks kexec operations if:
> - Active VMs exist in the partition table
> - Pages are still deposited to the hypervisor
> 
> The kernel cannot access hypervisor deposited pages: any access
> triggers a GPF. Until the deposited page state can be handed over
> to the next kernel, kexec must be blocked if there is any shared
> state between kernel and hypervisor.
> 
> For L1 host virtualization, attempt to withdraw all deposited memory before
> allowing kexec to proceed. If withdrawal fails or pages remain deposited
> block the kexec operation.
> 
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> ---
>   drivers/hv/Makefile            |    1 +
>   drivers/hv/hv_proc.c           |    4 ++
>   drivers/hv/mshv_kexec.c        |   66 ++++++++++++++++++++++++++++++++++++++++
>   drivers/hv/mshv_root.h         |   14 ++++++++
>   drivers/hv/mshv_root_hv_call.c |    2 +
>   drivers/hv/mshv_root_main.c    |    7 ++++
>   6 files changed, 94 insertions(+)
>   create mode 100644 drivers/hv/mshv_kexec.c
> 
> diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
> index a49f93c2d245..bb72be5cc525 100644
> --- a/drivers/hv/Makefile
> +++ b/drivers/hv/Makefile
> @@ -15,6 +15,7 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING)	+= hv_debugfs.o
>   hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
>   mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
>   	       mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o
> +mshv_root-$(CONFIG_KEXEC) += mshv_kexec.o
>   mshv_vtl-y := mshv_vtl_main.o
>   
>   # Code that must be built-in
> diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> index 89870c1b0087..39bbbedb0340 100644
> --- a/drivers/hv/hv_proc.c
> +++ b/drivers/hv/hv_proc.c
> @@ -15,6 +15,8 @@
>    */
>   #define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1)
>   
> +atomic_t hv_pages_deposited;
> +
>   /* Deposits exact number of pages. Must be called with interrupts enabled.  */
>   int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>   {
> @@ -93,6 +95,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>   		goto err_free_allocations;
>   	}
>   
> +	atomic_add(page_count, &hv_pages_deposited);
> +
>   	ret = 0;
>   	goto free_buf;
>   
> diff --git a/drivers/hv/mshv_kexec.c b/drivers/hv/mshv_kexec.c
> new file mode 100644
> index 000000000000..5222b2e4ff97
> --- /dev/null
> +++ b/drivers/hv/mshv_kexec.c
> @@ -0,0 +1,66 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2026, Microsoft Corporation.
> + *
> + * Live update orchestration management for mshv_root module.
> + *
> + * Author: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> + */
> +
> +#include <linux/kexec.h>
> +#include <linux/notifier.h>
> +#include <asm/mshyperv.h>
> +#include "mshv_root.h"
> +
> +static BLOCKING_NOTIFIER_HEAD(overlay_notify_chain);
> +
> +static int mshv_block_kexec_notify(struct notifier_block *nb,
> +				   unsigned long action, void *arg)
> +{
> +	if (!hash_empty(mshv_root.pt_htable)) {
> +		pr_warn("mshv: Cannot perform kexec while VMs are active\n");
> +		return -EBUSY;
> +	}
> +
> +	if (hv_l1vh_partition()) {
> +		int err;
> +
> +		/* Attempt to withdraw all the deposited pages */
> +		err = hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE,
> +					      hv_current_partition_id);
> +		if (err) {
> +			pr_err("mshv: Failed to withdraw memory from L1 virtualization: %d\n",
> +			       err);
> +			return err;
> +		}
> +	}
> +
> +	if (atomic_read(&hv_pages_deposited)) {
> +		pr_warn("mshv: Cannot perform kexec while pages are deposited\n");
> +		return -EBUSY;
> +	}
> +	return 0;
> +}
> +

What guarantees another deposit won't happen after this. Are all cpus
"locked" in kexec path and not doing anything at this point?

Thanks,
-Mukesh



> +static struct notifier_block mshv_kexec_notifier = {
> +	.notifier_call = mshv_block_kexec_notify,
> +};
> +
> +int __init mshv_kexec_init(void)
> +{
> +	int err;
> +
> +	err = kexec_block_notifier_register(&mshv_kexec_notifier);
> +	if (err) {
> +		pr_err("mshv: Could not register kexec notifier: %pe\n",
> +		       ERR_PTR(err));
> +		return err;
> +	}
> +
> +	return 0;
> +}
> +
> +void __exit mshv_kexec_exit(void)
> +{
> +	(void)kexec_block_notifier_unregister(&mshv_kexec_notifier);
> +}
> diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
> index 3c1d88b36741..311f76262d10 100644
> --- a/drivers/hv/mshv_root.h
> +++ b/drivers/hv/mshv_root.h
> @@ -17,6 +17,7 @@
>   #include <linux/build_bug.h>
>   #include <linux/mmu_notifier.h>
>   #include <uapi/linux/mshv.h>
> +#include <hyperv/hvhdk.h>
>   
>   /*
>    * Hypervisor must be between these version numbers (inclusive)
> @@ -319,6 +320,7 @@ int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 a
>   extern struct mshv_root mshv_root;
>   extern enum hv_scheduler_type hv_scheduler_type;
>   extern u8 * __percpu *hv_synic_eventring_tail;
> +extern atomic_t hv_pages_deposited;
>   
>   struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
>   					   u64 uaddr, u32 flags);
> @@ -333,4 +335,16 @@ bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn);
>   void mshv_region_movable_fini(struct mshv_mem_region *region);
>   bool mshv_region_movable_init(struct mshv_mem_region *region);
>   
> +#if IS_ENABLED(CONFIG_KEXEC)
> +int mshv_kexec_init(void);
> +void mshv_kexec_exit(void);
> +#else
> +static inline int mshv_kexec_init(void)
> +{
> +	return 0;
> +}
> +
> +static inline void mshv_kexec_exit(void) { }
> +#endif
> +
>   #endif /* _MSHV_ROOT_H_ */
> diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
> index 06f2bac8039d..4203af5190ee 100644
> --- a/drivers/hv/mshv_root_hv_call.c
> +++ b/drivers/hv/mshv_root_hv_call.c
> @@ -73,6 +73,8 @@ int hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
>   		for (i = 0; i < completed; i++)
>   			__free_page(pfn_to_page(output_page->gpa_page_list[i]));
>   
> +		atomic_sub(completed, &hv_pages_deposited);
> +
>   		if (!hv_result_success(status)) {
>   			if (hv_result(status) == HV_STATUS_NO_RESOURCES)
>   				status = HV_STATUS_SUCCESS;
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index 5fc572e31cd7..d55aa69d130c 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -2330,6 +2330,10 @@ static int __init mshv_parent_partition_init(void)
>   	if (ret)
>   		goto deinit_root_scheduler;
>   
> +	ret = mshv_kexec_init();
> +	if (ret)
> +		goto deinit_irqfd_wq;
> +
>   	spin_lock_init(&mshv_root.pt_ht_lock);
>   	hash_init(mshv_root.pt_htable);
>   
> @@ -2337,6 +2341,8 @@ static int __init mshv_parent_partition_init(void)
>   
>   	return 0;
>   
> +deinit_irqfd_wq:
> +	mshv_irqfd_wq_cleanup();
>   deinit_root_scheduler:
>   	root_scheduler_deinit();
>   exit_partition:
> @@ -2356,6 +2362,7 @@ static void __exit mshv_parent_partition_exit(void)
>   	hv_setup_mshv_handler(NULL);
>   	mshv_port_table_fini();
>   	misc_deregister(&mshv_dev);
> +	mshv_kexec_exit();
>   	mshv_irqfd_wq_cleanup();
>   	root_scheduler_deinit();
>   	if (hv_root_partition())
> 
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] kexec: Add permission notifier chain for kexec operations
  2026-01-28 17:42 ` [PATCH 1/2] kexec: Add permission notifier chain for kexec operations Stanislav Kinsburskii
@ 2026-02-12 22:12   ` Mukesh R
  2026-02-19 22:13     ` Stanislav Kinsburskii
  0 siblings, 1 reply; 9+ messages in thread
From: Mukesh R @ 2026-02-12 22:12 UTC (permalink / raw)
  To: Stanislav Kinsburskii, rppt, akpm, bhe, kys, haiyangz, wei.liu,
	decui, longli
  Cc: kexec, linux-hyperv, linux-kernel

On 1/28/26 09:42, Stanislav Kinsburskii wrote:
> Add a blocking notifier chain to allow subsystems to be notified
> before kexec execution. This enables modules to perform necessary
> cleanup or validation before the system transitions to a new kernel or
> block kexec if not possible under current conditions.
> 
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> ---
>   include/linux/kexec.h |    6 ++++++
>   kernel/kexec_core.c   |   24 ++++++++++++++++++++++++
>   2 files changed, 30 insertions(+)
> 
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index ff7e231b0485..311037d30f9e 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -35,6 +35,7 @@ extern note_buf_t __percpu *crash_notes;
>   #include <linux/ioport.h>
>   #include <linux/module.h>
>   #include <linux/highmem.h>
> +#include <linux/notifier.h>
>   #include <asm/kexec.h>
>   #include <linux/crash_core.h>
>   
> @@ -532,10 +533,13 @@ extern bool kexec_file_dbg_print;
>   
>   extern void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size);
>   extern void kimage_unmap_segment(void *buffer);
> +extern int kexec_block_notifier_register(struct notifier_block *nb);
> +extern int kexec_block_notifier_unregister(struct notifier_block *nb);
>   #else /* !CONFIG_KEXEC_CORE */
>   struct pt_regs;
>   struct task_struct;
>   struct kimage;
> +struct notifier_block;
>   static inline void __crash_kexec(struct pt_regs *regs) { }
>   static inline void crash_kexec(struct pt_regs *regs) { }
>   static inline int kexec_should_crash(struct task_struct *p) { return 0; }
> @@ -543,6 +547,8 @@ static inline int kexec_crash_loaded(void) { return 0; }
>   static inline void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size)
>   { return NULL; }
>   static inline void kimage_unmap_segment(void *buffer) { }
> +static inline int kexec_block_notifier_register(struct notifier_block *nb) { }
> +static inline int kexec_block_notifier_unregister(struct notifier_block *nb) { }
>   #define kexec_in_progress false
>   #endif /* CONFIG_KEXEC_CORE */
>   
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index 0f92acdd354d..1e86a6f175f0 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -57,6 +57,20 @@ bool kexec_in_progress = false;
>   
>   bool kexec_file_dbg_print;
>   
> +static BLOCKING_NOTIFIER_HEAD(kexec_block_list);
> +
> +int kexec_block_notifier_register(struct notifier_block *nb)
> +{
> +	return blocking_notifier_chain_register(&kexec_block_list, nb);
> +}
> +EXPORT_SYMBOL_GPL(kexec_block_notifier_register);
> +
> +int kexec_block_notifier_unregister(struct notifier_block *nb)
> +{
> +	return blocking_notifier_chain_unregister(&kexec_block_list, nb);
> +}
> +EXPORT_SYMBOL_GPL(kexec_block_notifier_unregister);
> +
>   /*
>    * When kexec transitions to the new kernel there is a one-to-one
>    * mapping between physical and virtual addresses.  On processors
> @@ -1124,6 +1138,12 @@ bool kexec_load_permitted(int kexec_image_type)
>   	return true;
>   }
>   
> +static int kexec_check_blockers(void)
> +{
> +	/* Notify subsystems of impending kexec */
> +	return blocking_notifier_call_chain(&kexec_block_list, 0, NULL);
> +}
> +
>   /*
>    * Move into place and start executing a preloaded standalone
>    * executable.  If nothing was preloaded return an error.
> @@ -1139,6 +1159,10 @@ int kernel_kexec(void)
>   		goto Unlock;
>   	}
>   
> +	error = kexec_check_blockers();

This could take a long time, and I am not sure if it's a good idea
to stall kexec with such dependencies.

Thanks,
-Mukesh


> +	if (error)
> +		goto Unlock;
> +
>   	error = liveupdate_reboot();
>   	if (error)
>   		goto Unlock;
> 
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] mshv: Add kexec blocking support
  2026-02-12 22:11   ` Mukesh R
@ 2026-02-18  8:14     ` Wei Liu
  2026-02-19 22:16     ` Stanislav Kinsburskii
  1 sibling, 0 replies; 9+ messages in thread
From: Wei Liu @ 2026-02-18  8:14 UTC (permalink / raw)
  To: Mukesh R
  Cc: Stanislav Kinsburskii, rppt, akpm, bhe, kys, haiyangz, wei.liu,
	decui, longli, kexec, linux-hyperv, linux-kernel

On Thu, Feb 12, 2026 at 02:11:13PM -0800, Mukesh R wrote:
> On 1/28/26 09:42, Stanislav Kinsburskii wrote:
> > Add kexec notifier to prevent kexec when VMs are active or memory
> > is deposited. The notifier blocks kexec operations if:
> > - Active VMs exist in the partition table
> > - Pages are still deposited to the hypervisor
> > 
> > The kernel cannot access hypervisor deposited pages: any access
> > triggers a GPF. Until the deposited page state can be handed over
> > to the next kernel, kexec must be blocked if there is any shared
> > state between kernel and hypervisor.
> > 
> > For L1 host virtualization, attempt to withdraw all deposited memory before
> > allowing kexec to proceed. If withdrawal fails or pages remain deposited
> > block the kexec operation.
> > 
> > Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> > ---
> >   drivers/hv/Makefile            |    1 +
> >   drivers/hv/hv_proc.c           |    4 ++
> >   drivers/hv/mshv_kexec.c        |   66 ++++++++++++++++++++++++++++++++++++++++
> >   drivers/hv/mshv_root.h         |   14 ++++++++
> >   drivers/hv/mshv_root_hv_call.c |    2 +
> >   drivers/hv/mshv_root_main.c    |    7 ++++
> >   6 files changed, 94 insertions(+)
> >   create mode 100644 drivers/hv/mshv_kexec.c
> > 
> > diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
> > index a49f93c2d245..bb72be5cc525 100644
> > --- a/drivers/hv/Makefile
> > +++ b/drivers/hv/Makefile
> > @@ -15,6 +15,7 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING)	+= hv_debugfs.o
> >   hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
> >   mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
> >   	       mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o
> > +mshv_root-$(CONFIG_KEXEC) += mshv_kexec.o
> >   mshv_vtl-y := mshv_vtl_main.o
> >   # Code that must be built-in
> > diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> > index 89870c1b0087..39bbbedb0340 100644
> > --- a/drivers/hv/hv_proc.c
> > +++ b/drivers/hv/hv_proc.c
> > @@ -15,6 +15,8 @@
> >    */
> >   #define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1)
> > +atomic_t hv_pages_deposited;
> > +
> >   /* Deposits exact number of pages. Must be called with interrupts enabled.  */
> >   int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> >   {
> > @@ -93,6 +95,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> >   		goto err_free_allocations;
> >   	}
> > +	atomic_add(page_count, &hv_pages_deposited);
> > +
> >   	ret = 0;
> >   	goto free_buf;
> > diff --git a/drivers/hv/mshv_kexec.c b/drivers/hv/mshv_kexec.c
> > new file mode 100644
> > index 000000000000..5222b2e4ff97
> > --- /dev/null
> > +++ b/drivers/hv/mshv_kexec.c
> > @@ -0,0 +1,66 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (c) 2026, Microsoft Corporation.
> > + *
> > + * Live update orchestration management for mshv_root module.
> > + *
> > + * Author: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> > + */
> > +
> > +#include <linux/kexec.h>
> > +#include <linux/notifier.h>
> > +#include <asm/mshyperv.h>
> > +#include "mshv_root.h"
> > +
> > +static BLOCKING_NOTIFIER_HEAD(overlay_notify_chain);
> > +
> > +static int mshv_block_kexec_notify(struct notifier_block *nb,
> > +				   unsigned long action, void *arg)
> > +{
> > +	if (!hash_empty(mshv_root.pt_htable)) {
> > +		pr_warn("mshv: Cannot perform kexec while VMs are active\n");
> > +		return -EBUSY;
> > +	}
> > +
> > +	if (hv_l1vh_partition()) {
> > +		int err;
> > +
> > +		/* Attempt to withdraw all the deposited pages */
> > +		err = hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE,
> > +					      hv_current_partition_id);
> > +		if (err) {
> > +			pr_err("mshv: Failed to withdraw memory from L1 virtualization: %d\n",
> > +			       err);
> > +			return err;
> > +		}
> > +	}
> > +
> > +	if (atomic_read(&hv_pages_deposited)) {
> > +		pr_warn("mshv: Cannot perform kexec while pages are deposited\n");
> > +		return -EBUSY;
> > +	}
> > +	return 0;
> > +}
> > +
> 
> What guarantees another deposit won't happen after this. Are all cpus
> "locked" in kexec path and not doing anything at this point?
> 

An alternative is to block kexec if any pages have ever been deposited.
This is a very heavy-handed approach.

Wei

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] kexec: Add permission notifier chain for kexec operations
  2026-02-12 22:12   ` Mukesh R
@ 2026-02-19 22:13     ` Stanislav Kinsburskii
  0 siblings, 0 replies; 9+ messages in thread
From: Stanislav Kinsburskii @ 2026-02-19 22:13 UTC (permalink / raw)
  To: Mukesh R
  Cc: rppt, akpm, bhe, kys, haiyangz, wei.liu, decui, longli, kexec,
	linux-hyperv, linux-kernel

On Thu, Feb 12, 2026 at 02:12:29PM -0800, Mukesh R wrote:
> On 1/28/26 09:42, Stanislav Kinsburskii wrote:
> > Add a blocking notifier chain to allow subsystems to be notified
> > before kexec execution. This enables modules to perform necessary
> > cleanup or validation before the system transitions to a new kernel or
> > block kexec if not possible under current conditions.
> > 
> > Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> > ---
> >   include/linux/kexec.h |    6 ++++++
> >   kernel/kexec_core.c   |   24 ++++++++++++++++++++++++
> >   2 files changed, 30 insertions(+)
> > 
> > diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> > index ff7e231b0485..311037d30f9e 100644
> > --- a/include/linux/kexec.h
> > +++ b/include/linux/kexec.h
> > @@ -35,6 +35,7 @@ extern note_buf_t __percpu *crash_notes;
> >   #include <linux/ioport.h>
> >   #include <linux/module.h>
> >   #include <linux/highmem.h>
> > +#include <linux/notifier.h>
> >   #include <asm/kexec.h>
> >   #include <linux/crash_core.h>
> > @@ -532,10 +533,13 @@ extern bool kexec_file_dbg_print;
> >   extern void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size);
> >   extern void kimage_unmap_segment(void *buffer);
> > +extern int kexec_block_notifier_register(struct notifier_block *nb);
> > +extern int kexec_block_notifier_unregister(struct notifier_block *nb);
> >   #else /* !CONFIG_KEXEC_CORE */
> >   struct pt_regs;
> >   struct task_struct;
> >   struct kimage;
> > +struct notifier_block;
> >   static inline void __crash_kexec(struct pt_regs *regs) { }
> >   static inline void crash_kexec(struct pt_regs *regs) { }
> >   static inline int kexec_should_crash(struct task_struct *p) { return 0; }
> > @@ -543,6 +547,8 @@ static inline int kexec_crash_loaded(void) { return 0; }
> >   static inline void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size)
> >   { return NULL; }
> >   static inline void kimage_unmap_segment(void *buffer) { }
> > +static inline int kexec_block_notifier_register(struct notifier_block *nb) { }
> > +static inline int kexec_block_notifier_unregister(struct notifier_block *nb) { }
> >   #define kexec_in_progress false
> >   #endif /* CONFIG_KEXEC_CORE */
> > diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> > index 0f92acdd354d..1e86a6f175f0 100644
> > --- a/kernel/kexec_core.c
> > +++ b/kernel/kexec_core.c
> > @@ -57,6 +57,20 @@ bool kexec_in_progress = false;
> >   bool kexec_file_dbg_print;
> > +static BLOCKING_NOTIFIER_HEAD(kexec_block_list);
> > +
> > +int kexec_block_notifier_register(struct notifier_block *nb)
> > +{
> > +	return blocking_notifier_chain_register(&kexec_block_list, nb);
> > +}
> > +EXPORT_SYMBOL_GPL(kexec_block_notifier_register);
> > +
> > +int kexec_block_notifier_unregister(struct notifier_block *nb)
> > +{
> > +	return blocking_notifier_chain_unregister(&kexec_block_list, nb);
> > +}
> > +EXPORT_SYMBOL_GPL(kexec_block_notifier_unregister);
> > +
> >   /*
> >    * When kexec transitions to the new kernel there is a one-to-one
> >    * mapping between physical and virtual addresses.  On processors
> > @@ -1124,6 +1138,12 @@ bool kexec_load_permitted(int kexec_image_type)
> >   	return true;
> >   }
> > +static int kexec_check_blockers(void)
> > +{
> > +	/* Notify subsystems of impending kexec */
> > +	return blocking_notifier_call_chain(&kexec_block_list, 0, NULL);
> > +}
> > +
> >   /*
> >    * Move into place and start executing a preloaded standalone
> >    * executable.  If nothing was preloaded return an error.
> > @@ -1139,6 +1159,10 @@ int kernel_kexec(void)
> >   		goto Unlock;
> >   	}
> > +	error = kexec_check_blockers();
> 
> This could take a long time, and I am not sure if it's a good idea
> to stall kexec with such dependencies.
> 

Whether the call takes time should not matter. liveudpate_reboot()
already introduced the same semantics below.

Thanks,
Stanislav

> Thanks,
> -Mukesh
> 
> 
> > +	if (error)
> > +		goto Unlock;
> > +
> >   	error = liveupdate_reboot();
> >   	if (error)
> >   		goto Unlock;
> > 
> > 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] mshv: Add kexec blocking support
  2026-02-12 22:11   ` Mukesh R
  2026-02-18  8:14     ` Wei Liu
@ 2026-02-19 22:16     ` Stanislav Kinsburskii
  1 sibling, 0 replies; 9+ messages in thread
From: Stanislav Kinsburskii @ 2026-02-19 22:16 UTC (permalink / raw)
  To: Mukesh R
  Cc: rppt, akpm, bhe, kys, haiyangz, wei.liu, decui, longli, kexec,
	linux-hyperv, linux-kernel

On Thu, Feb 12, 2026 at 02:11:13PM -0800, Mukesh R wrote:
> On 1/28/26 09:42, Stanislav Kinsburskii wrote:
> > Add kexec notifier to prevent kexec when VMs are active or memory
> > is deposited. The notifier blocks kexec operations if:
> > - Active VMs exist in the partition table
> > - Pages are still deposited to the hypervisor
> > 
> > The kernel cannot access hypervisor deposited pages: any access
> > triggers a GPF. Until the deposited page state can be handed over
> > to the next kernel, kexec must be blocked if there is any shared
> > state between kernel and hypervisor.
> > 
> > For L1 host virtualization, attempt to withdraw all deposited memory before
> > allowing kexec to proceed. If withdrawal fails or pages remain deposited
> > block the kexec operation.
> > 
> > Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> > ---
> >   drivers/hv/Makefile            |    1 +
> >   drivers/hv/hv_proc.c           |    4 ++
> >   drivers/hv/mshv_kexec.c        |   66 ++++++++++++++++++++++++++++++++++++++++
> >   drivers/hv/mshv_root.h         |   14 ++++++++
> >   drivers/hv/mshv_root_hv_call.c |    2 +
> >   drivers/hv/mshv_root_main.c    |    7 ++++
> >   6 files changed, 94 insertions(+)
> >   create mode 100644 drivers/hv/mshv_kexec.c
> > 
> > diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
> > index a49f93c2d245..bb72be5cc525 100644
> > --- a/drivers/hv/Makefile
> > +++ b/drivers/hv/Makefile
> > @@ -15,6 +15,7 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING)	+= hv_debugfs.o
> >   hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
> >   mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
> >   	       mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o
> > +mshv_root-$(CONFIG_KEXEC) += mshv_kexec.o
> >   mshv_vtl-y := mshv_vtl_main.o
> >   # Code that must be built-in
> > diff --git a/drivers/hv/hv_proc.c b/drivers/hv/hv_proc.c
> > index 89870c1b0087..39bbbedb0340 100644
> > --- a/drivers/hv/hv_proc.c
> > +++ b/drivers/hv/hv_proc.c
> > @@ -15,6 +15,8 @@
> >    */
> >   #define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1)
> > +atomic_t hv_pages_deposited;
> > +
> >   /* Deposits exact number of pages. Must be called with interrupts enabled.  */
> >   int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> >   {
> > @@ -93,6 +95,8 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> >   		goto err_free_allocations;
> >   	}
> > +	atomic_add(page_count, &hv_pages_deposited);
> > +
> >   	ret = 0;
> >   	goto free_buf;
> > diff --git a/drivers/hv/mshv_kexec.c b/drivers/hv/mshv_kexec.c
> > new file mode 100644
> > index 000000000000..5222b2e4ff97
> > --- /dev/null
> > +++ b/drivers/hv/mshv_kexec.c
> > @@ -0,0 +1,66 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (c) 2026, Microsoft Corporation.
> > + *
> > + * Live update orchestration management for mshv_root module.
> > + *
> > + * Author: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> > + */
> > +
> > +#include <linux/kexec.h>
> > +#include <linux/notifier.h>
> > +#include <asm/mshyperv.h>
> > +#include "mshv_root.h"
> > +
> > +static BLOCKING_NOTIFIER_HEAD(overlay_notify_chain);
> > +
> > +static int mshv_block_kexec_notify(struct notifier_block *nb,
> > +				   unsigned long action, void *arg)
> > +{
> > +	if (!hash_empty(mshv_root.pt_htable)) {
> > +		pr_warn("mshv: Cannot perform kexec while VMs are active\n");
> > +		return -EBUSY;
> > +	}
> > +
> > +	if (hv_l1vh_partition()) {
> > +		int err;
> > +
> > +		/* Attempt to withdraw all the deposited pages */
> > +		err = hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE,
> > +					      hv_current_partition_id);
> > +		if (err) {
> > +			pr_err("mshv: Failed to withdraw memory from L1 virtualization: %d\n",
> > +			       err);
> > +			return err;
> > +		}
> > +	}
> > +
> > +	if (atomic_read(&hv_pages_deposited)) {
> > +		pr_warn("mshv: Cannot perform kexec while pages are deposited\n");
> > +		return -EBUSY;
> > +	}
> > +	return 0;
> > +}
> > +
> 
> What guarantees another deposit won't happen after this. Are all cpus
> "locked" in kexec path and not doing anything at this point?
> 

Yeah, this should be guarded.

Thanks,
Stanislav

> Thanks,
> -Mukesh
> 
> 
> 
> > +static struct notifier_block mshv_kexec_notifier = {
> > +	.notifier_call = mshv_block_kexec_notify,
> > +};
> > +
> > +int __init mshv_kexec_init(void)
> > +{
> > +	int err;
> > +
> > +	err = kexec_block_notifier_register(&mshv_kexec_notifier);
> > +	if (err) {
> > +		pr_err("mshv: Could not register kexec notifier: %pe\n",
> > +		       ERR_PTR(err));
> > +		return err;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +void __exit mshv_kexec_exit(void)
> > +{
> > +	(void)kexec_block_notifier_unregister(&mshv_kexec_notifier);
> > +}
> > diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
> > index 3c1d88b36741..311f76262d10 100644
> > --- a/drivers/hv/mshv_root.h
> > +++ b/drivers/hv/mshv_root.h
> > @@ -17,6 +17,7 @@
> >   #include <linux/build_bug.h>
> >   #include <linux/mmu_notifier.h>
> >   #include <uapi/linux/mshv.h>
> > +#include <hyperv/hvhdk.h>
> >   /*
> >    * Hypervisor must be between these version numbers (inclusive)
> > @@ -319,6 +320,7 @@ int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 a
> >   extern struct mshv_root mshv_root;
> >   extern enum hv_scheduler_type hv_scheduler_type;
> >   extern u8 * __percpu *hv_synic_eventring_tail;
> > +extern atomic_t hv_pages_deposited;
> >   struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
> >   					   u64 uaddr, u32 flags);
> > @@ -333,4 +335,16 @@ bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn);
> >   void mshv_region_movable_fini(struct mshv_mem_region *region);
> >   bool mshv_region_movable_init(struct mshv_mem_region *region);
> > +#if IS_ENABLED(CONFIG_KEXEC)
> > +int mshv_kexec_init(void);
> > +void mshv_kexec_exit(void);
> > +#else
> > +static inline int mshv_kexec_init(void)
> > +{
> > +	return 0;
> > +}
> > +
> > +static inline void mshv_kexec_exit(void) { }
> > +#endif
> > +
> >   #endif /* _MSHV_ROOT_H_ */
> > diff --git a/drivers/hv/mshv_root_hv_call.c b/drivers/hv/mshv_root_hv_call.c
> > index 06f2bac8039d..4203af5190ee 100644
> > --- a/drivers/hv/mshv_root_hv_call.c
> > +++ b/drivers/hv/mshv_root_hv_call.c
> > @@ -73,6 +73,8 @@ int hv_call_withdraw_memory(u64 count, int node, u64 partition_id)
> >   		for (i = 0; i < completed; i++)
> >   			__free_page(pfn_to_page(output_page->gpa_page_list[i]));
> > +		atomic_sub(completed, &hv_pages_deposited);
> > +
> >   		if (!hv_result_success(status)) {
> >   			if (hv_result(status) == HV_STATUS_NO_RESOURCES)
> >   				status = HV_STATUS_SUCCESS;
> > diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> > index 5fc572e31cd7..d55aa69d130c 100644
> > --- a/drivers/hv/mshv_root_main.c
> > +++ b/drivers/hv/mshv_root_main.c
> > @@ -2330,6 +2330,10 @@ static int __init mshv_parent_partition_init(void)
> >   	if (ret)
> >   		goto deinit_root_scheduler;
> > +	ret = mshv_kexec_init();
> > +	if (ret)
> > +		goto deinit_irqfd_wq;
> > +
> >   	spin_lock_init(&mshv_root.pt_ht_lock);
> >   	hash_init(mshv_root.pt_htable);
> > @@ -2337,6 +2341,8 @@ static int __init mshv_parent_partition_init(void)
> >   	return 0;
> > +deinit_irqfd_wq:
> > +	mshv_irqfd_wq_cleanup();
> >   deinit_root_scheduler:
> >   	root_scheduler_deinit();
> >   exit_partition:
> > @@ -2356,6 +2362,7 @@ static void __exit mshv_parent_partition_exit(void)
> >   	hv_setup_mshv_handler(NULL);
> >   	mshv_port_table_fini();
> >   	misc_deregister(&mshv_dev);
> > +	mshv_kexec_exit();
> >   	mshv_irqfd_wq_cleanup();
> >   	root_scheduler_deinit();
> >   	if (hv_root_partition())
> > 
> > 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2026-02-19 22:16 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-28 17:41 [PATCH 0/2] kexec: Refuse kernel-unsafe Microsoft Hypervisor transitions Stanislav Kinsburskii
2026-01-28 17:42 ` [PATCH 1/2] kexec: Add permission notifier chain for kexec operations Stanislav Kinsburskii
2026-02-12 22:12   ` Mukesh R
2026-02-19 22:13     ` Stanislav Kinsburskii
2026-01-28 17:42 ` [PATCH 2/2] mshv: Add kexec blocking support Stanislav Kinsburskii
2026-02-12 22:11   ` Mukesh R
2026-02-18  8:14     ` Wei Liu
2026-02-19 22:16     ` Stanislav Kinsburskii
2026-02-11 23:30 ` [PATCH 0/2] kexec: Refuse kernel-unsafe Microsoft Hypervisor transitions Stanislav Kinsburskii

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox