Linux-HyperV List

Linux-HyperV List
 help / color / mirror / Atom feed

* [PATCH 7/7] mshv: Allocate pfns array only for pinned regions
From: Stanislav Kinsburskii @ 2026-04-01 22:13 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel
In-Reply-To: <177508151446.215674.7844504277869257435.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>

Convert pfns to a pointer allocated only for pinned regions that
actually need it for share/unshare/evict operations. Unpinned
regions use NULL since HMM handles their mappings dynamically.

The pfns array was previously a flexible array member, forcing
allocation for all regions regardless of memory type. This wastes
significant memory for unpinned HMM-managed regions which don't
need persistent PFN tracking - a 1GB region wastes 2MB for an
unused array.

This also allows using kzalloc for the main structure instead of
vzalloc, improving allocation efficiency and cache locality.

Simplify unpinned region invalidation by calling the hypervisor
directly rather than tracking PFNs. The tradeoff of skipping huge
page optimization is acceptable since invalidation ranges are
typically small and not performance-critical.

Add NULL checks where pfns array is required and update cleanup
to handle conditional allocation.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 drivers/hv/mshv_regions.c |   50 ++++++++++++++++++++++++++-------------------
 drivers/hv/mshv_root.h    |    6 ++++-
 2 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c
index 3c2a35f3bc31..e3a6ade4919e 100644
--- a/drivers/hv/mshv_regions.c
+++ b/drivers/hv/mshv_regions.c
@@ -243,7 +243,7 @@ struct mshv_region *mshv_region_create(enum mshv_region_type type,
 	int ret = 0;
 	u64 i;
 
-	region = vzalloc(sizeof(*region) + sizeof(unsigned long) * nr_pfns);
+	region = kzalloc_obj(*region);
 	if (!region)
 		return ERR_PTR(-ENOMEM);
 
@@ -255,6 +255,13 @@ struct mshv_region *mshv_region_create(enum mshv_region_type type,
 						   &mshv_region_mni_ops);
 		break;
 	case MSHV_REGION_TYPE_MEM_PINNED:
+		region->mreg_pfns = vzalloc(sizeof(unsigned long) * nr_pfns);
+		if (!region->mreg_pfns) {
+			ret = -ENOMEM;
+			break;
+		}
+		for (i = 0; i < nr_pfns; i++)
+			region->mreg_pfns[i] = MSHV_INVALID_PFN;
 		break;
 	case MSHV_REGION_TYPE_MMIO:
 		region->mreg_mmio_pfn = mmio_pfn;
@@ -276,16 +283,13 @@ struct mshv_region *mshv_region_create(enum mshv_region_type type,
 	if (flags & BIT(MSHV_SET_MEM_BIT_EXECUTABLE))
 		region->hv_map_flags |= HV_MAP_GPA_EXECUTABLE;
 
-	for (i = 0; i < nr_pfns; i++)
-		region->mreg_pfns[i] = MSHV_INVALID_PFN;
-
 	mutex_init(&region->mreg_mutex);
 	kref_init(&region->mreg_refcount);
 
 	return region;
 
 free_region:
-	vfree(region);
+	kfree(region);
 	return ERR_PTR(ret);
 }
 
@@ -312,6 +316,9 @@ static int mshv_region_share(struct mshv_region *region)
 {
 	u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_SHARED;
 
+	if (!region->mreg_pfns)
+		return -EINVAL;
+
 	return mshv_region_process_range(region, flags,
 					 0, region->nr_pfns,
 					 region->mreg_pfns,
@@ -340,6 +347,9 @@ static int mshv_region_unshare(struct mshv_region *region)
 {
 	u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_EXCLUSIVE;
 
+	if (!region->mreg_pfns)
+		return -EINVAL;
+
 	return mshv_region_process_range(region, flags,
 					 0, region->nr_pfns,
 					 region->mreg_pfns,
@@ -394,13 +404,11 @@ static void mshv_region_invalidate_pfns(struct mshv_region *region,
 {
 	u64 i;
 
-	for (i = pfn_offset; i < pfn_offset + pfn_count; i++) {
-		if (!pfn_valid(region->mreg_pfns[i]))
-			continue;
-
-		if (region->mreg_type == MSHV_REGION_TYPE_MEM_PINNED)
-			unpin_user_page(pfn_to_page(region->mreg_pfns[i]));
+	if (region->mreg_type != MSHV_REGION_TYPE_MEM_PINNED)
+		return;
 
+	for (i = pfn_offset; i < pfn_offset + pfn_count; i++) {
+		unpin_user_page(pfn_to_page(region->mreg_pfns[i]));
 		region->mreg_pfns[i] = MSHV_INVALID_PFN;
 	}
 }
@@ -506,7 +514,8 @@ static void mshv_region_destroy(struct kref *ref)
 
 	mshv_region_invalidate(region);
 
-	vfree(region);
+	vfree(region->mreg_pfns);
+	kfree(region);
 }
 
 void mshv_region_put(struct mshv_region *region)
@@ -616,10 +625,9 @@ static int mshv_region_hmm_fault_and_lock(struct mshv_region *region,
  *   leaving missing pages as invalid PFN markers.
  *   Used for initial region setup.
  *
- * Collected PFNs are stored in region->mreg_pfns[] with HMM bookkeeping
- * flags cleared, then the range is mapped into the hypervisor. Present
- * PFNs get mapped with region access permissions; missing PFNs (zero
- * entries) get mapped with no-access permissions.
+ * HMM bookkeeping flags are stripped from collected PFNs before mapping.
+ * Present PFNs get mapped with region access permissions; missing PFNs
+ * (marked as MSHV_INVALID_PFN) get mapped with no-access permissions.
  *
  * Return: 0 on success, negative errno on failure.
  */
@@ -648,15 +656,17 @@ static int mshv_region_collect_and_map(struct mshv_region *region,
 		goto out;
 
 	for (i = 0; i < pfn_count; i++) {
-		if (!(pfns[i] & HMM_PFN_VALID))
+		if (!(pfns[i] & HMM_PFN_VALID)) {
+			pfns[i] = MSHV_INVALID_PFN;
 			continue;
+		}
 		/* Drop HMM_PFN_* flags to ensure PFNs are valid. */
-		region->mreg_pfns[pfn_offset + i] = pfns[i] & ~HMM_PFN_FLAGS;
+		pfns[i] &= ~HMM_PFN_FLAGS;
 	}
 
 	ret = mshv_region_remap_pfns(region, region->hv_map_flags,
 				     pfn_offset, pfn_count,
-				     region->mreg_pfns + pfn_offset);
+				     pfns);
 
 	mutex_unlock(&region->mreg_mutex);
 out:
@@ -770,8 +780,6 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni,
 	if (ret)
 		goto out_unlock;
 
-	mshv_region_invalidate_pfns(region, pfn_offset, pfn_count);
-
 	mutex_unlock(&region->mreg_mutex);
 
 	return true;
diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
index 97659ba55418..e43bdbf1ada8 100644
--- a/drivers/hv/mshv_root.h
+++ b/drivers/hv/mshv_root.h
@@ -92,8 +92,10 @@ struct mshv_region {
 	enum mshv_region_type mreg_type;
 	struct mmu_interval_notifier mreg_mni;
 	struct mutex mreg_mutex;	/* protects region PFNs remapping */
-	u64 mreg_mmio_pfn;
-	unsigned long mreg_pfns[];
+	union {
+		unsigned long *mreg_pfns;
+		u64 mreg_mmio_pfn;
+	};
 };
 
 struct mshv_irq_ack_notifier {



^ permalink raw reply related

* [PATCH 6/7] mshv: Simplify pfn array handling in region processing
From: Stanislav Kinsburskii @ 2026-04-01 22:12 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel
In-Reply-To: <177508151446.215674.7844504277869257435.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>

The current code requires passing both the full pfn array and an offset
parameter to region processing functions, forcing callees to manually
index into arrays. This approach is inflexible and makes it difficult
to work with different sources of pfn arrays.

Upcoming changes will need to pass pfn arrays obtained from the HMM
framework directly to these functions. The HMM framework returns arrays
that represent specific ranges rather than full region arrays with
offsets, making the current offset-based indexing pattern incompatible.

Refactor by having callers pass pre-offset pointers to pfn arrays and
removing offset-based indexing from callees. This allows functions to
work with any pfn array starting at index 0, regardless of its source,
and prepares the code for HMM integration.

No functional change intended.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 drivers/hv/mshv_regions.c |   38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c
index 120ce2588501..3c2a35f3bc31 100644
--- a/drivers/hv/mshv_regions.c
+++ b/drivers/hv/mshv_regions.c
@@ -92,7 +92,7 @@ static long mshv_region_process_pfns(struct mshv_region *region,
 	unsigned long pfn;
 	int stride, ret;
 
-	pfn = pfns[pfn_offset];
+	pfn = pfns[0];
 	if (!pfn_valid(pfn))
 		return -EINVAL;
 
@@ -102,7 +102,7 @@ static long mshv_region_process_pfns(struct mshv_region *region,
 
 	/* Start at stride since the first stride is validated */
 	for (count = stride; count < pfn_count ; count += stride) {
-		pfn = pfns[pfn_offset + count];
+		pfn = pfns[count];
 
 		/* Break if current pfn is invalid */
 		if (!pfn_valid(pfn))
@@ -157,7 +157,7 @@ static long mshv_region_process_chunk(struct mshv_region *region,
 				      unsigned long *pfns,
 				      pfn_handler_t handler)
 {
-	if (pfn_valid(pfns[pfn_offset]))
+	if (pfn_valid(pfns[0]))
 		return mshv_region_process_pfns(region, flags,
 				pfn_offset, pfn_count, pfns,
 				handler);
@@ -204,10 +204,7 @@ static int mshv_region_process_range(struct mshv_region *region,
 	if (end > region->nr_pfns)
 		return -EINVAL;
 
-	start = pfn_offset;
-	end = pfn_offset + 1;
-
-	while (end < pfn_offset + pfn_count) {
+	for (start = 0, end = 1; end < pfn_count; ) {
 		/*
 		 * Accumulate contiguous pfns with the same validity
 		 * (valid or not).
@@ -218,8 +215,9 @@ static int mshv_region_process_range(struct mshv_region *region,
 		}
 
 		ret = mshv_region_process_chunk(region, flags,
-						start, end - start,
-						pfns, handler);
+						pfn_offset + start,
+						end - start,
+						pfns + start, handler);
 		if (ret < 0)
 			return ret;
 
@@ -227,8 +225,9 @@ static int mshv_region_process_range(struct mshv_region *region,
 	}
 
 	ret = mshv_region_process_chunk(region, flags,
-					start, end - start,
-					pfns, handler);
+					pfn_offset + start,
+					end - start,
+					pfns + start, handler);
 	if (ret < 0)
 		return ret;
 
@@ -296,15 +295,14 @@ static int mshv_region_chunk_share(struct mshv_region *region,
 				   unsigned long *pfns,
 				   bool huge_page)
 {
-	if (!pfn_valid(pfns[pfn_offset]))
+	if (!pfn_valid(pfns[0]))
 		return -EINVAL;
 
 	if (huge_page)
 		flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
 
 	return hv_call_modify_spa_host_access(region->partition->pt_id,
-					      pfns + pfn_offset,
-					      pfn_count,
+					      pfns, pfn_count,
 					      HV_MAP_GPA_READABLE |
 					      HV_MAP_GPA_WRITABLE,
 					      flags, true);
@@ -326,15 +324,15 @@ static int mshv_region_chunk_unshare(struct mshv_region *region,
 				     unsigned long *pfns,
 				     bool huge_page)
 {
-	if (!pfn_valid(pfns[pfn_offset]))
+	if (!pfn_valid(pfns[0]))
 		return -EINVAL;
 
 	if (huge_page)
 		flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
 
 	return hv_call_modify_spa_host_access(region->partition->pt_id,
-					      pfns + pfn_offset,
-					      pfn_count, 0,
+					      pfns, pfn_count,
+					      0,
 					      flags, false);
 }
 
@@ -359,7 +357,7 @@ static int mshv_region_chunk_remap(struct mshv_region *region,
 	 * hypervisor track dirty pages, enabling precopy live
 	 * migration.
 	 */
-	if (!pfn_valid(pfns[pfn_offset]))
+	if (!pfn_valid(pfns[0]))
 		flags = HV_MAP_GPA_NO_ACCESS;
 
 	if (huge_page)
@@ -368,7 +366,7 @@ static int mshv_region_chunk_remap(struct mshv_region *region,
 	return hv_call_map_ram_pfns(region->partition->pt_id,
 				    region->start_gfn + pfn_offset,
 				    pfn_count, flags,
-				    pfns + pfn_offset);
+				    pfns);
 }
 
 static int mshv_region_remap_pfns(struct mshv_region *region,
@@ -658,7 +656,7 @@ static int mshv_region_collect_and_map(struct mshv_region *region,
 
 	ret = mshv_region_remap_pfns(region, region->hv_map_flags,
 				     pfn_offset, pfn_count,
-				     region->mreg_pfns);
+				     region->mreg_pfns + pfn_offset);
 
 	mutex_unlock(&region->mreg_mutex);
 out:



^ permalink raw reply related

* [PATCH 5/7] mshv: Pass pfns array explicitly through processing chain
From: Stanislav Kinsburskii @ 2026-04-01 22:12 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel
In-Reply-To: <177508151446.215674.7844504277869257435.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>

The current implementation relies on accessing region->pfns directly
within the pfn processing chain, making it difficult to use these
handlers with alternative pfn sources. This tight coupling limits
flexibility when processing pfns from different locations, such as
temporary arrays or external sources.

By threading the pfns pointer through the entire processing chain
(mshv_region_process_range, mshv_region_process_chunk, and all
handlers), we decouple the processing logic from the storage location.
This enables future enhancements like processing pfns from multiple
sources or implementing more sophisticated memory management strategies
without duplicating the core processing logic.

No functional change intended.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 drivers/hv/mshv_regions.c |   61 +++++++++++++++++++++++++++------------------
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c
index a92381219758..120ce2588501 100644
--- a/drivers/hv/mshv_regions.c
+++ b/drivers/hv/mshv_regions.c
@@ -22,7 +22,7 @@
 
 typedef int (*pfn_handler_t)(struct mshv_region *region, u32 flags,
 			     u64 pfn_offset, u64 pfn_count,
-			     bool huge_page);
+			     unsigned long *pfns, bool huge_page);
 
 static const struct mmu_interval_notifier_ops mshv_region_mni_ops;
 
@@ -84,6 +84,7 @@ static int mshv_chunk_stride(struct page *page,
 static long mshv_region_process_pfns(struct mshv_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
+				     unsigned long *pfns,
 				     pfn_handler_t handler)
 {
 	u64 gfn = region->start_gfn + pfn_offset;
@@ -91,7 +92,7 @@ static long mshv_region_process_pfns(struct mshv_region *region,
 	unsigned long pfn;
 	int stride, ret;
 
-	pfn = region->mreg_pfns[pfn_offset];
+	pfn = pfns[pfn_offset];
 	if (!pfn_valid(pfn))
 		return -EINVAL;
 
@@ -101,7 +102,7 @@ static long mshv_region_process_pfns(struct mshv_region *region,
 
 	/* Start at stride since the first stride is validated */
 	for (count = stride; count < pfn_count ; count += stride) {
-		pfn = region->mreg_pfns[pfn_offset + count];
+		pfn = pfns[pfn_offset + count];
 
 		/* Break if current pfn is invalid */
 		if (!pfn_valid(pfn))
@@ -114,7 +115,7 @@ static long mshv_region_process_pfns(struct mshv_region *region,
 			break;
 	}
 
-	ret = handler(region, flags, pfn_offset, count, stride > 1);
+	ret = handler(region, flags, pfn_offset, count, pfns, stride > 1);
 	if (ret)
 		return ret;
 
@@ -138,11 +139,12 @@ static long mshv_region_process_pfns(struct mshv_region *region,
 static long mshv_region_process_hole(struct mshv_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
+				     unsigned long *pfns,
 				     pfn_handler_t handler)
 {
 	long ret;
 
-	ret = handler(region, flags, pfn_offset, pfn_count, 0);
+	ret = handler(region, flags, pfn_offset, pfn_count, pfns, 0);
 	if (ret)
 		return ret;
 
@@ -152,15 +154,16 @@ static long mshv_region_process_hole(struct mshv_region *region,
 static long mshv_region_process_chunk(struct mshv_region *region,
 				      u32 flags,
 				      u64 pfn_offset, u64 pfn_count,
+				      unsigned long *pfns,
 				      pfn_handler_t handler)
 {
-	if (pfn_valid(region->mreg_pfns[pfn_offset]))
+	if (pfn_valid(pfns[pfn_offset]))
 		return mshv_region_process_pfns(region, flags,
-				pfn_offset, pfn_count,
+				pfn_offset, pfn_count, pfns,
 				handler);
 	else
 		return mshv_region_process_hole(region, flags,
-				pfn_offset, pfn_count,
+				pfn_offset, pfn_count, pfns,
 				handler);
 }
 
@@ -170,12 +173,13 @@ static long mshv_region_process_chunk(struct mshv_region *region,
  * @flags     : Flags to pass to the handler.
  * @pfn_offset: Offset into the region's PFNs array to start processing.
  * @pfn_count : Number of PFNs to process.
+ * @pfns      : Pointer to an array of PFNs corresponding to the region.
  * @handler   : Callback function to handle each chunk of contiguous
  *              valid PFNs.
  *
- * Iterates over the specified range of PFNs in @region, skipping
- * invalid PFNs. For each contiguous chunk of valid PFNS, invokes
- * @handler via mshv_region_process_pfns.
+ * Iterates over the specified range of PFNs, skipping invalid PFNs.
+ * For each contiguous chunk of valid PFNS, invokes @handler via
+ * mshv_region_process_pfns.
  *
  * Note: The @handler callback must be able to handle PFNs backed by both
  * normal and huge pages.
@@ -185,6 +189,7 @@ static long mshv_region_process_chunk(struct mshv_region *region,
 static int mshv_region_process_range(struct mshv_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
+				     unsigned long *pfns,
 				     pfn_handler_t handler)
 {
 	u64 start, end;
@@ -207,15 +212,14 @@ static int mshv_region_process_range(struct mshv_region *region,
 		 * Accumulate contiguous pfns with the same validity
 		 * (valid or not).
 		 */
-		if (pfn_valid(region->mreg_pfns[start]) ==
-		    pfn_valid(region->mreg_pfns[end])) {
+		if (pfn_valid(pfns[start]) == pfn_valid(pfns[end])) {
 			end++;
 			continue;
 		}
 
 		ret = mshv_region_process_chunk(region, flags,
 						start, end - start,
-						handler);
+						pfns, handler);
 		if (ret < 0)
 			return ret;
 
@@ -224,7 +228,7 @@ static int mshv_region_process_range(struct mshv_region *region,
 
 	ret = mshv_region_process_chunk(region, flags,
 					start, end - start,
-					handler);
+					pfns, handler);
 	if (ret < 0)
 		return ret;
 
@@ -289,16 +293,17 @@ struct mshv_region *mshv_region_create(enum mshv_region_type type,
 static int mshv_region_chunk_share(struct mshv_region *region,
 				   u32 flags,
 				   u64 pfn_offset, u64 pfn_count,
+				   unsigned long *pfns,
 				   bool huge_page)
 {
-	if (!pfn_valid(region->mreg_pfns[pfn_offset]))
+	if (!pfn_valid(pfns[pfn_offset]))
 		return -EINVAL;
 
 	if (huge_page)
 		flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
 
 	return hv_call_modify_spa_host_access(region->partition->pt_id,
-					      region->mreg_pfns + pfn_offset,
+					      pfns + pfn_offset,
 					      pfn_count,
 					      HV_MAP_GPA_READABLE |
 					      HV_MAP_GPA_WRITABLE,
@@ -311,22 +316,24 @@ static int mshv_region_share(struct mshv_region *region)
 
 	return mshv_region_process_range(region, flags,
 					 0, region->nr_pfns,
+					 region->mreg_pfns,
 					 mshv_region_chunk_share);
 }
 
 static int mshv_region_chunk_unshare(struct mshv_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
+				     unsigned long *pfns,
 				     bool huge_page)
 {
-	if (!pfn_valid(region->mreg_pfns[pfn_offset]))
+	if (!pfn_valid(pfns[pfn_offset]))
 		return -EINVAL;
 
 	if (huge_page)
 		flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE;
 
 	return hv_call_modify_spa_host_access(region->partition->pt_id,
-					      region->mreg_pfns + pfn_offset,
+					      pfns + pfn_offset,
 					      pfn_count, 0,
 					      flags, false);
 }
@@ -337,12 +344,14 @@ static int mshv_region_unshare(struct mshv_region *region)
 
 	return mshv_region_process_range(region, flags,
 					 0, region->nr_pfns,
+					 region->mreg_pfns,
 					 mshv_region_chunk_unshare);
 }
 
 static int mshv_region_chunk_remap(struct mshv_region *region,
 				   u32 flags,
 				   u64 pfn_offset, u64 pfn_count,
+				   unsigned long *pfns,
 				   bool huge_page)
 {
 	/*
@@ -350,7 +359,7 @@ static int mshv_region_chunk_remap(struct mshv_region *region,
 	 * hypervisor track dirty pages, enabling precopy live
 	 * migration.
 	 */
-	if (!pfn_valid(region->mreg_pfns[pfn_offset]))
+	if (!pfn_valid(pfns[pfn_offset]))
 		flags = HV_MAP_GPA_NO_ACCESS;
 
 	if (huge_page)
@@ -359,15 +368,17 @@ static int mshv_region_chunk_remap(struct mshv_region *region,
 	return hv_call_map_ram_pfns(region->partition->pt_id,
 				    region->start_gfn + pfn_offset,
 				    pfn_count, flags,
-				    region->mreg_pfns + pfn_offset);
+				    pfns + pfn_offset);
 }
 
 static int mshv_region_remap_pfns(struct mshv_region *region,
 				  u32 map_flags,
-				  u64 pfn_offset, u64 pfn_count)
+				  u64 pfn_offset, u64 pfn_count,
+				  unsigned long *pfns)
 {
 	return mshv_region_process_range(region, map_flags,
 					 pfn_offset, pfn_count,
+					 pfns,
 					 mshv_region_chunk_remap);
 }
 
@@ -376,7 +387,8 @@ static int mshv_region_map(struct mshv_region *region)
 	u32 map_flags = region->hv_map_flags;
 
 	return mshv_region_remap_pfns(region, map_flags,
-				      0, region->nr_pfns);
+				      0, region->nr_pfns,
+				      region->mreg_pfns);
 }
 
 static void mshv_region_invalidate_pfns(struct mshv_region *region,
@@ -645,7 +657,8 @@ static int mshv_region_collect_and_map(struct mshv_region *region,
 	}
 
 	ret = mshv_region_remap_pfns(region, region->hv_map_flags,
-				     pfn_offset, pfn_count);
+				     pfn_offset, pfn_count,
+				     region->mreg_pfns);
 
 	mutex_unlock(&region->mreg_mutex);
 out:



^ permalink raw reply related

* [PATCH 4/7] mshv: Optimize memory region mapping operations
From: Stanislav Kinsburskii @ 2026-04-01 22:12 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel
In-Reply-To: <177508151446.215674.7844504277869257435.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>

Two specific operations don't require PFN iteration: region unmapping
and region remapping with no access. For unmapping, all frames in MSHV
memory regions are guaranteed to be mapped with page access, so we can
unmap them all without checking individual PFNs. For remapping with no
access, all frames are already mapped with page access, allowing us to
unmap them all in one pass.

Since neither operation needs PFN validation, iterating over PFNs is
redundant. Batch operations into large page-aligned chunks followed by
remaining pages. This eliminates PFN traversal for these operations,
requires no additional hypercalls compared to the PFN-checking approach,
and provides the simplest possible sequential execution path.

The optimization utilizes HV_MAP_GPA_LARGE_PAGE and
HV_UNMAP_GPA_LARGE_PAGE flags for aligned portions, processing only the
remainder with base page granularity. This removes mshv_region_chunk_unmap()
and mshv_region_process_range() helper functions, reducing code complexity.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 drivers/hv/mshv_regions.c |   65 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c
index 2c4215381e0b..a92381219758 100644
--- a/drivers/hv/mshv_regions.c
+++ b/drivers/hv/mshv_regions.c
@@ -449,27 +449,27 @@ static int mshv_region_pin(struct mshv_region *region)
 	return ret < 0 ? ret : -ENOMEM;
 }
 
-static int mshv_region_chunk_unmap(struct mshv_region *region,
-				   u32 flags,
-				   u64 pfn_offset, u64 pfn_count,
-				   bool huge_page)
+static int mshv_region_unmap(struct mshv_region *region)
 {
-	if (!pfn_valid(region->mreg_pfns[pfn_offset]))
-		return 0;
+	u64 aligned_pages, remaining_pages;
+	int ret = 0;
 
-	if (huge_page)
-		flags |= HV_UNMAP_GPA_LARGE_PAGE;
+	aligned_pages = ALIGN_DOWN(region->nr_pfns, PTRS_PER_PMD);
+	remaining_pages = region->nr_pfns - aligned_pages;
 
-	return hv_call_unmap_pfns(region->partition->pt_id,
-				  region->start_gfn + pfn_offset,
-				  pfn_count, flags);
-}
+	if (aligned_pages)
+		ret = hv_call_unmap_pfns(region->partition->pt_id,
+					 region->start_gfn,
+					 aligned_pages,
+					 HV_UNMAP_GPA_LARGE_PAGE);
 
-static int mshv_region_unmap(struct mshv_region *region)
-{
-	return mshv_region_process_range(region, 0,
-					 0, region->nr_pfns,
-					 mshv_region_chunk_unmap);
+	if (!ret && remaining_pages)
+		ret = hv_call_unmap_pfns(region->partition->pt_id,
+					 region->start_gfn + aligned_pages,
+					 remaining_pages,
+					 0);
+
+	return ret;
 }
 
 static void mshv_region_destroy(struct kref *ref)
@@ -684,6 +684,34 @@ bool mshv_region_handle_gfn_fault(struct mshv_region *region, u64 gfn)
 	return !ret;
 }
 
+static int mshv_region_map_no_access(struct mshv_region *region,
+				     u64 pfn_offset, u64 pfn_count)
+{
+	u64 aligned_pages, remaining_pages;
+	int ret = 0;
+
+	aligned_pages = ALIGN_DOWN(pfn_count, PTRS_PER_PMD);
+	remaining_pages = pfn_count - aligned_pages;
+
+	if (aligned_pages)
+		ret = hv_call_map_ram_pfns(region->partition->pt_id,
+					   region->start_gfn + pfn_offset,
+					   aligned_pages,
+					   HV_MAP_GPA_NO_ACCESS |
+					   HV_MAP_GPA_LARGE_PAGE,
+					   NULL);
+
+	if (!ret && remaining_pages)
+		ret = hv_call_map_ram_pfns(region->partition->pt_id,
+					   region->start_gfn +
+					   aligned_pages + pfn_offset,
+					   remaining_pages,
+					   HV_MAP_GPA_NO_ACCESS,
+					   NULL);
+
+	return ret;
+}
+
 /**
  * mshv_region_interval_invalidate - Invalidate a range of memory region
  * @mni: Pointer to the mmu_interval_notifier structure
@@ -727,8 +755,7 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni,
 
 	mmu_interval_set_seq(mni, cur_seq);
 
-	ret = mshv_region_remap_pfns(region, HV_MAP_GPA_NO_ACCESS,
-				     pfn_offset, pfn_count);
+	ret = mshv_region_map_no_access(region, pfn_offset, pfn_count);
 	if (ret)
 		goto out_unlock;
 



^ permalink raw reply related

* [PATCH 3/7] mshv: Rename mshv_mem_region to mshv_region
From: Stanislav Kinsburskii @ 2026-04-01 22:12 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel
In-Reply-To: <177508151446.215674.7844504277869257435.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>

The mshv_mem_region structure represents guest address space regions,
which can be either RAM-backed memory or memory-mapped IO regions
without physical backing. The "mem_" prefix incorrectly suggests the
structure only handles memory regions, creating confusion about its
actual purpose.

Remove the "mem_" prefix to align with existing function naming
(mshv_region_map, mshv_region_pin, etc.) and accurately reflect that
this structure manages arbitrary guest address space mappings
regardless of their backing type.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 drivers/hv/mshv_regions.c   |   74 ++++++++++++++++++++++---------------------
 drivers/hv/mshv_root.h      |   18 +++++-----
 drivers/hv/mshv_root_main.c |   20 ++++++------
 3 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c
index 70cd0857a28e..2c4215381e0b 100644
--- a/drivers/hv/mshv_regions.c
+++ b/drivers/hv/mshv_regions.c
@@ -20,7 +20,7 @@
 #define MSHV_MAP_FAULT_IN_PAGES				PTRS_PER_PMD
 #define MSHV_INVALID_PFN				ULONG_MAX
 
-typedef int (*pfn_handler_t)(struct mshv_mem_region *region, u32 flags,
+typedef int (*pfn_handler_t)(struct mshv_region *region, u32 flags,
 			     u64 pfn_offset, u64 pfn_count,
 			     bool huge_page);
 
@@ -81,7 +81,7 @@ static int mshv_chunk_stride(struct page *page,
  *
  * Return: Number of pages handled, or negative error code.
  */
-static long mshv_region_process_pfns(struct mshv_mem_region *region,
+static long mshv_region_process_pfns(struct mshv_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
 				     pfn_handler_t handler)
@@ -135,7 +135,7 @@ static long mshv_region_process_pfns(struct mshv_mem_region *region,
  *
  * Return: Number of PFNs handled, or negative error code.
  */
-static long mshv_region_process_hole(struct mshv_mem_region *region,
+static long mshv_region_process_hole(struct mshv_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
 				     pfn_handler_t handler)
@@ -149,7 +149,7 @@ static long mshv_region_process_hole(struct mshv_mem_region *region,
 	return pfn_count;
 }
 
-static long mshv_region_process_chunk(struct mshv_mem_region *region,
+static long mshv_region_process_chunk(struct mshv_region *region,
 				      u32 flags,
 				      u64 pfn_offset, u64 pfn_count,
 				      pfn_handler_t handler)
@@ -182,7 +182,7 @@ static long mshv_region_process_chunk(struct mshv_mem_region *region,
  *
  * Returns 0 on success, or a negative error code on failure.
  */
-static int mshv_region_process_range(struct mshv_mem_region *region,
+static int mshv_region_process_range(struct mshv_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
 				     pfn_handler_t handler)
@@ -231,12 +231,12 @@ static int mshv_region_process_range(struct mshv_mem_region *region,
 	return 0;
 }
 
-struct mshv_mem_region *mshv_region_create(enum mshv_region_type type,
-					   u64 guest_pfn, u64 nr_pfns,
-					   u64 uaddr, u32 flags,
-					   ulong mmio_pfn)
+struct mshv_region *mshv_region_create(enum mshv_region_type type,
+				       u64 guest_pfn, u64 nr_pfns,
+				       u64 uaddr, u32 flags,
+				       ulong mmio_pfn)
 {
-	struct mshv_mem_region *region;
+	struct mshv_region *region;
 	int ret = 0;
 	u64 i;
 
@@ -286,7 +286,7 @@ struct mshv_mem_region *mshv_region_create(enum mshv_region_type type,
 	return ERR_PTR(ret);
 }
 
-static int mshv_region_chunk_share(struct mshv_mem_region *region,
+static int mshv_region_chunk_share(struct mshv_region *region,
 				   u32 flags,
 				   u64 pfn_offset, u64 pfn_count,
 				   bool huge_page)
@@ -305,7 +305,7 @@ static int mshv_region_chunk_share(struct mshv_mem_region *region,
 					      flags, true);
 }
 
-static int mshv_region_share(struct mshv_mem_region *region)
+static int mshv_region_share(struct mshv_region *region)
 {
 	u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_SHARED;
 
@@ -314,7 +314,7 @@ static int mshv_region_share(struct mshv_mem_region *region)
 					 mshv_region_chunk_share);
 }
 
-static int mshv_region_chunk_unshare(struct mshv_mem_region *region,
+static int mshv_region_chunk_unshare(struct mshv_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
 				     bool huge_page)
@@ -331,7 +331,7 @@ static int mshv_region_chunk_unshare(struct mshv_mem_region *region,
 					      flags, false);
 }
 
-static int mshv_region_unshare(struct mshv_mem_region *region)
+static int mshv_region_unshare(struct mshv_region *region)
 {
 	u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_EXCLUSIVE;
 
@@ -340,7 +340,7 @@ static int mshv_region_unshare(struct mshv_mem_region *region)
 					 mshv_region_chunk_unshare);
 }
 
-static int mshv_region_chunk_remap(struct mshv_mem_region *region,
+static int mshv_region_chunk_remap(struct mshv_region *region,
 				   u32 flags,
 				   u64 pfn_offset, u64 pfn_count,
 				   bool huge_page)
@@ -362,7 +362,7 @@ static int mshv_region_chunk_remap(struct mshv_mem_region *region,
 				    region->mreg_pfns + pfn_offset);
 }
 
-static int mshv_region_remap_pfns(struct mshv_mem_region *region,
+static int mshv_region_remap_pfns(struct mshv_region *region,
 				  u32 map_flags,
 				  u64 pfn_offset, u64 pfn_count)
 {
@@ -371,7 +371,7 @@ static int mshv_region_remap_pfns(struct mshv_mem_region *region,
 					 mshv_region_chunk_remap);
 }
 
-static int mshv_region_map(struct mshv_mem_region *region)
+static int mshv_region_map(struct mshv_region *region)
 {
 	u32 map_flags = region->hv_map_flags;
 
@@ -379,7 +379,7 @@ static int mshv_region_map(struct mshv_mem_region *region)
 				      0, region->nr_pfns);
 }
 
-static void mshv_region_invalidate_pfns(struct mshv_mem_region *region,
+static void mshv_region_invalidate_pfns(struct mshv_region *region,
 					u64 pfn_offset, u64 pfn_count)
 {
 	u64 i;
@@ -395,12 +395,12 @@ static void mshv_region_invalidate_pfns(struct mshv_mem_region *region,
 	}
 }
 
-static void mshv_region_invalidate(struct mshv_mem_region *region)
+static void mshv_region_invalidate(struct mshv_region *region)
 {
 	mshv_region_invalidate_pfns(region, 0, region->nr_pfns);
 }
 
-static int mshv_region_pin(struct mshv_mem_region *region)
+static int mshv_region_pin(struct mshv_region *region)
 {
 	u64 done_count, nr_pfns, i;
 	unsigned long *pfns;
@@ -449,7 +449,7 @@ static int mshv_region_pin(struct mshv_mem_region *region)
 	return ret < 0 ? ret : -ENOMEM;
 }
 
-static int mshv_region_chunk_unmap(struct mshv_mem_region *region,
+static int mshv_region_chunk_unmap(struct mshv_region *region,
 				   u32 flags,
 				   u64 pfn_offset, u64 pfn_count,
 				   bool huge_page)
@@ -465,7 +465,7 @@ static int mshv_region_chunk_unmap(struct mshv_mem_region *region,
 				  pfn_count, flags);
 }
 
-static int mshv_region_unmap(struct mshv_mem_region *region)
+static int mshv_region_unmap(struct mshv_region *region)
 {
 	return mshv_region_process_range(region, 0,
 					 0, region->nr_pfns,
@@ -474,8 +474,8 @@ static int mshv_region_unmap(struct mshv_mem_region *region)
 
 static void mshv_region_destroy(struct kref *ref)
 {
-	struct mshv_mem_region *region =
-		container_of(ref, struct mshv_mem_region, mreg_refcount);
+	struct mshv_region *region =
+		container_of(ref, struct mshv_region, mreg_refcount);
 	struct mshv_partition *partition = region->partition;
 	int ret;
 
@@ -499,12 +499,12 @@ static void mshv_region_destroy(struct kref *ref)
 	vfree(region);
 }
 
-void mshv_region_put(struct mshv_mem_region *region)
+void mshv_region_put(struct mshv_region *region)
 {
 	kref_put(&region->mreg_refcount, mshv_region_destroy);
 }
 
-int mshv_region_get(struct mshv_mem_region *region)
+int mshv_region_get(struct mshv_region *region)
 {
 	return kref_get_unless_zero(&region->mreg_refcount);
 }
@@ -534,7 +534,7 @@ int mshv_region_get(struct mshv_mem_region *region)
  *
  * Return: 0 on success, a negative error code otherwise.
  */
-static int mshv_region_hmm_fault_and_lock(struct mshv_mem_region *region,
+static int mshv_region_hmm_fault_and_lock(struct mshv_region *region,
 					  unsigned long start,
 					  unsigned long end,
 					  unsigned long *pfns,
@@ -613,7 +613,7 @@ static int mshv_region_hmm_fault_and_lock(struct mshv_mem_region *region,
  *
  * Return: 0 on success, negative errno on failure.
  */
-static int mshv_region_collect_and_map(struct mshv_mem_region *region,
+static int mshv_region_collect_and_map(struct mshv_region *region,
 				       u64 pfn_offset, u64 pfn_count,
 				       bool do_fault)
 {
@@ -653,14 +653,14 @@ static int mshv_region_collect_and_map(struct mshv_mem_region *region,
 	return ret;
 }
 
-static int mshv_region_range_fault(struct mshv_mem_region *region,
+static int mshv_region_range_fault(struct mshv_region *region,
 				   u64 pfn_offset, u64 pfn_count)
 {
 	return mshv_region_collect_and_map(region, pfn_offset, pfn_count,
 					   true);
 }
 
-bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn)
+bool mshv_region_handle_gfn_fault(struct mshv_region *region, u64 gfn)
 {
 	u64 pfn_offset, pfn_count;
 	int ret;
@@ -706,9 +706,9 @@ static bool mshv_region_interval_invalidate(struct mmu_interval_notifier *mni,
 					    const struct mmu_notifier_range *range,
 					    unsigned long cur_seq)
 {
-	struct mshv_mem_region *region = container_of(mni,
-						      struct mshv_mem_region,
-						      mreg_mni);
+	struct mshv_region *region = container_of(mni,
+						  struct mshv_region,
+						  mreg_mni);
 	u64 pfn_offset, pfn_count;
 	unsigned long mstart, mend;
 	int ret = -EPERM;
@@ -767,7 +767,7 @@ static const struct mmu_interval_notifier_ops mshv_region_mni_ops = {
  *
  * Return: 0 on success, negative error code on failure.
  */
-static int mshv_map_pinned_region(struct mshv_mem_region *region)
+static int mshv_map_pinned_region(struct mshv_region *region)
 {
 	struct mshv_partition *partition = region->partition;
 	int ret;
@@ -823,13 +823,13 @@ static int mshv_map_pinned_region(struct mshv_mem_region *region)
 	return ret;
 }
 
-static int mshv_map_movable_region(struct mshv_mem_region *region)
+static int mshv_map_movable_region(struct mshv_region *region)
 {
 	return mshv_region_collect_and_map(region, 0, region->nr_pfns,
 					   false);
 }
 
-static int mshv_map_mmio_region(struct mshv_mem_region *region)
+static int mshv_map_mmio_region(struct mshv_region *region)
 {
 	struct mshv_partition *partition = region->partition;
 
@@ -838,7 +838,7 @@ static int mshv_map_mmio_region(struct mshv_mem_region *region)
 				     region->nr_pfns);
 }
 
-int mshv_map_region(struct mshv_mem_region *region)
+int mshv_map_region(struct mshv_region *region)
 {
 	switch (region->mreg_type) {
 	case MSHV_REGION_TYPE_MEM_PINNED:
diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
index 2bcdfa070517..97659ba55418 100644
--- a/drivers/hv/mshv_root.h
+++ b/drivers/hv/mshv_root.h
@@ -81,7 +81,7 @@ enum mshv_region_type {
 	MSHV_REGION_TYPE_MMIO
 };
 
-struct mshv_mem_region {
+struct mshv_region {
 	struct hlist_node hnode;
 	struct kref mreg_refcount;
 	u64 nr_pfns;
@@ -367,13 +367,13 @@ extern struct mshv_root mshv_root;
 extern enum hv_scheduler_type hv_scheduler_type;
 extern u8 * __percpu *hv_synic_eventring_tail;
 
-struct mshv_mem_region *mshv_region_create(enum mshv_region_type type,
-					   u64 guest_pfn, u64 nr_pfns,
-					   u64 uaddr, u32 flags,
-					   ulong mmio_pfn);
-void mshv_region_put(struct mshv_mem_region *region);
-int mshv_region_get(struct mshv_mem_region *region);
-bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn);
-int mshv_map_region(struct mshv_mem_region *region);
+struct mshv_region *mshv_region_create(enum mshv_region_type type,
+				       u64 guest_pfn, u64 nr_pfns,
+				       u64 uaddr, u32 flags,
+				       ulong mmio_pfn);
+void mshv_region_put(struct mshv_region *region);
+int mshv_region_get(struct mshv_region *region);
+bool mshv_region_handle_gfn_fault(struct mshv_region *region, u64 gfn);
+int mshv_map_region(struct mshv_region *region);
 
 #endif /* _MSHV_ROOT_H_ */
diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
index 3bfa9e9c575f..9d83a2348655 100644
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -612,10 +612,10 @@ static long mshv_run_vp_with_root_scheduler(struct mshv_vp *vp)
 static_assert(sizeof(struct hv_message) <= MSHV_RUN_VP_BUF_SZ,
 	      "sizeof(struct hv_message) must not exceed MSHV_RUN_VP_BUF_SZ");
 
-static struct mshv_mem_region *
+static struct mshv_region *
 mshv_partition_region_by_gfn(struct mshv_partition *partition, u64 gfn)
 {
-	struct mshv_mem_region *region;
+	struct mshv_region *region;
 
 	hlist_for_each_entry(region, &partition->pt_mem_regions, hnode) {
 		if (gfn >= region->start_gfn &&
@@ -626,10 +626,10 @@ mshv_partition_region_by_gfn(struct mshv_partition *partition, u64 gfn)
 	return NULL;
 }
 
-static struct mshv_mem_region *
+static struct mshv_region *
 mshv_partition_region_by_gfn_get(struct mshv_partition *p, u64 gfn)
 {
-	struct mshv_mem_region *region;
+	struct mshv_region *region;
 
 	spin_lock(&p->pt_mem_regions_lock);
 	region = mshv_partition_region_by_gfn(p, gfn);
@@ -656,7 +656,7 @@ mshv_partition_region_by_gfn_get(struct mshv_partition *p, u64 gfn)
 static bool mshv_handle_gpa_intercept(struct mshv_vp *vp)
 {
 	struct mshv_partition *p = vp->vp_partition;
-	struct mshv_mem_region *region;
+	struct mshv_region *region;
 	bool ret = false;
 	u64 gfn;
 #if defined(CONFIG_X86_64)
@@ -1217,9 +1217,9 @@ static void mshv_async_hvcall_handler(void *data, u64 *status)
  */
 static int mshv_partition_create_region(struct mshv_partition *partition,
 					struct mshv_user_mem_region *mem,
-					struct mshv_mem_region **regionpp)
+					struct mshv_region **regionpp)
 {
-	struct mshv_mem_region *rg;
+	struct mshv_region *rg;
 	enum mshv_region_type type;
 	u64 nr_pfns = HVPFN_DOWN(mem->size);
 	struct vm_area_struct *vma;
@@ -1282,7 +1282,7 @@ static long
 mshv_map_user_memory(struct mshv_partition *partition,
 		     struct mshv_user_mem_region mem)
 {
-	struct mshv_mem_region *region;
+	struct mshv_region *region;
 	long ret;
 
 	if (mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP) ||
@@ -1318,7 +1318,7 @@ static long
 mshv_unmap_user_memory(struct mshv_partition *partition,
 		       struct mshv_user_mem_region mem)
 {
-	struct mshv_mem_region *region;
+	struct mshv_region *region;
 
 	if (!(mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP)))
 		return -EINVAL;
@@ -1690,7 +1690,7 @@ remove_partition(struct mshv_partition *partition)
 static void destroy_partition(struct mshv_partition *partition)
 {
 	struct mshv_vp *vp;
-	struct mshv_mem_region *region;
+	struct mshv_region *region;
 	struct hlist_node *n;
 	int i;
 



^ permalink raw reply related

* [PATCH 2/7] mshv: Improve code readability with handler function typedef
From: Stanislav Kinsburskii @ 2026-04-01 22:12 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel
In-Reply-To: <177508151446.215674.7844504277869257435.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>

The inline function pointer declarations in mshv_region_process_*
functions make the code harder to read and maintain. Each function
signature repeats the same lengthy callback parameter definition,
adding visual noise and making the actual logic less clear.

Introduce pfn_handler_t typedef to replace the repeated inline
function pointer declarations. This simplifies function signatures,
makes the code more maintainable, and follows common kernel
patterns for callback handling.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 drivers/hv/mshv_regions.c |   28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c
index a85d18e2c279..70cd0857a28e 100644
--- a/drivers/hv/mshv_regions.c
+++ b/drivers/hv/mshv_regions.c
@@ -20,6 +20,10 @@
 #define MSHV_MAP_FAULT_IN_PAGES				PTRS_PER_PMD
 #define MSHV_INVALID_PFN				ULONG_MAX
 
+typedef int (*pfn_handler_t)(struct mshv_mem_region *region, u32 flags,
+			     u64 pfn_offset, u64 pfn_count,
+			     bool huge_page);
+
 static const struct mmu_interval_notifier_ops mshv_region_mni_ops;
 
 /**
@@ -80,11 +84,7 @@ static int mshv_chunk_stride(struct page *page,
 static long mshv_region_process_pfns(struct mshv_mem_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
-				     int (*handler)(struct mshv_mem_region *region,
-						    u32 flags,
-						    u64 pfn_offset,
-						    u64 pfn_count,
-						    bool huge_page))
+				     pfn_handler_t handler)
 {
 	u64 gfn = region->start_gfn + pfn_offset;
 	u64 count;
@@ -138,11 +138,7 @@ static long mshv_region_process_pfns(struct mshv_mem_region *region,
 static long mshv_region_process_hole(struct mshv_mem_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
-				     int (*handler)(struct mshv_mem_region *region,
-						    u32 flags,
-						    u64 pfn_offset,
-						    u64 pfn_count,
-						    bool huge_page))
+				     pfn_handler_t handler)
 {
 	long ret;
 
@@ -156,11 +152,7 @@ static long mshv_region_process_hole(struct mshv_mem_region *region,
 static long mshv_region_process_chunk(struct mshv_mem_region *region,
 				      u32 flags,
 				      u64 pfn_offset, u64 pfn_count,
-				      int (*handler)(struct mshv_mem_region *region,
-						     u32 flags,
-						     u64 pfn_offset,
-						     u64 pfn_count,
-						     bool huge_page))
+				      pfn_handler_t handler)
 {
 	if (pfn_valid(region->mreg_pfns[pfn_offset]))
 		return mshv_region_process_pfns(region, flags,
@@ -193,11 +185,7 @@ static long mshv_region_process_chunk(struct mshv_mem_region *region,
 static int mshv_region_process_range(struct mshv_mem_region *region,
 				     u32 flags,
 				     u64 pfn_offset, u64 pfn_count,
-				     int (*handler)(struct mshv_mem_region *region,
-						    u32 flags,
-						    u64 pfn_offset,
-						    u64 pfn_count,
-						    bool huge_page))
+				     pfn_handler_t handler)
 {
 	u64 start, end;
 	long ret;



^ permalink raw reply related

* [PATCH 1/7] mshv: Consolidate region creation and mapping
From: Stanislav Kinsburskii @ 2026-04-01 22:12 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel
In-Reply-To: <177508151446.215674.7844504277869257435.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>

Consolidate region type detection and initialization into
mshv_region_create() to simplify the region creation flow. Move type
determination logic (MMIO/pinned/movable) earlier in the process and
initialize type-specific fields during creation rather than after.

This eliminates the need for mshv_region_movable_init/fini() by
handling MMU interval notifier setup directly in the constructor and
teardown in the destructor. Region mapping is also unified through a
single mshv_map_region() dispatcher that routes to the appropriate
type-specific handler.

Changes improve code organization by:
- Reducing API surface (4 fewer exported functions)
- Centralizing type determination and validation
- Making region lifecycle more explicit and easier to follow
- Removing post-construction initialization steps

The refactoring maintains existing functionality while making the
codebase more maintainable and less error-prone.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
---
 drivers/hv/mshv_regions.c   |   81 ++++++++++++++++++++++++++++---------------
 drivers/hv/mshv_root.h      |   14 +++----
 drivers/hv/mshv_root_main.c |   61 +++++++++++++-------------------
 3 files changed, 83 insertions(+), 73 deletions(-)

diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c
index 6b703b269a4f..a85d18e2c279 100644
--- a/drivers/hv/mshv_regions.c
+++ b/drivers/hv/mshv_regions.c
@@ -20,6 +20,8 @@
 #define MSHV_MAP_FAULT_IN_PAGES				PTRS_PER_PMD
 #define MSHV_INVALID_PFN				ULONG_MAX
 
+static const struct mmu_interval_notifier_ops mshv_region_mni_ops;
+
 /**
  * mshv_chunk_stride - Compute stride for mapping guest memory
  * @page      : The page to check for huge page backing
@@ -241,16 +243,39 @@ static int mshv_region_process_range(struct mshv_mem_region *region,
 	return 0;
 }
 
-struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pfns,
-					   u64 uaddr, u32 flags)
+struct mshv_mem_region *mshv_region_create(enum mshv_region_type type,
+					   u64 guest_pfn, u64 nr_pfns,
+					   u64 uaddr, u32 flags,
+					   ulong mmio_pfn)
 {
 	struct mshv_mem_region *region;
+	int ret = 0;
 	u64 i;
 
 	region = vzalloc(sizeof(*region) + sizeof(unsigned long) * nr_pfns);
 	if (!region)
 		return ERR_PTR(-ENOMEM);
 
+	switch (type) {
+	case MSHV_REGION_TYPE_MEM_MOVABLE:
+		ret = mmu_interval_notifier_insert(&region->mreg_mni,
+						   current->mm, uaddr,
+						   nr_pfns << HV_HYP_PAGE_SHIFT,
+						   &mshv_region_mni_ops);
+		break;
+	case MSHV_REGION_TYPE_MEM_PINNED:
+		break;
+	case MSHV_REGION_TYPE_MMIO:
+		region->mreg_mmio_pfn = mmio_pfn;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret)
+		goto free_region;
+
+	region->mreg_type = type;
 	region->nr_pfns = nr_pfns;
 	region->start_gfn = guest_pfn;
 	region->start_uaddr = uaddr;
@@ -263,9 +288,14 @@ struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pfns,
 	for (i = 0; i < nr_pfns; i++)
 		region->mreg_pfns[i] = MSHV_INVALID_PFN;
 
+	mutex_init(&region->mreg_mutex);
 	kref_init(&region->mreg_refcount);
 
 	return region;
+
+free_region:
+	vfree(region);
+	return ERR_PTR(ret);
 }
 
 static int mshv_region_chunk_share(struct mshv_mem_region *region,
@@ -462,7 +492,7 @@ static void mshv_region_destroy(struct kref *ref)
 	int ret;
 
 	if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE)
-		mshv_region_movable_fini(region);
+		mmu_interval_notifier_remove(&region->mreg_mni);
 
 	if (mshv_partition_encrypted(partition)) {
 		ret = mshv_region_share(region);
@@ -736,27 +766,6 @@ static const struct mmu_interval_notifier_ops mshv_region_mni_ops = {
 	.invalidate = mshv_region_interval_invalidate,
 };
 
-void mshv_region_movable_fini(struct mshv_mem_region *region)
-{
-	mmu_interval_notifier_remove(&region->mreg_mni);
-}
-
-bool mshv_region_movable_init(struct mshv_mem_region *region)
-{
-	int ret;
-
-	ret = mmu_interval_notifier_insert(&region->mreg_mni, current->mm,
-					   region->start_uaddr,
-					   region->nr_pfns << HV_HYP_PAGE_SHIFT,
-					   &mshv_region_mni_ops);
-	if (ret)
-		return false;
-
-	mutex_init(&region->mreg_mutex);
-
-	return true;
-}
-
 /**
  * mshv_map_pinned_region - Pin and map memory regions
  * @region: Pointer to the memory region structure
@@ -770,7 +779,7 @@ bool mshv_region_movable_init(struct mshv_mem_region *region)
  *
  * Return: 0 on success, negative error code on failure.
  */
-int mshv_map_pinned_region(struct mshv_mem_region *region)
+static int mshv_map_pinned_region(struct mshv_mem_region *region)
 {
 	struct mshv_partition *partition = region->partition;
 	int ret;
@@ -826,17 +835,31 @@ int mshv_map_pinned_region(struct mshv_mem_region *region)
 	return ret;
 }
 
-int mshv_map_movable_region(struct mshv_mem_region *region)
+static int mshv_map_movable_region(struct mshv_mem_region *region)
 {
 	return mshv_region_collect_and_map(region, 0, region->nr_pfns,
 					   false);
 }
 
-int mshv_map_mmio_region(struct mshv_mem_region *region,
-			 unsigned long mmio_pfn)
+static int mshv_map_mmio_region(struct mshv_mem_region *region)
 {
 	struct mshv_partition *partition = region->partition;
 
 	return hv_call_map_mmio_pfns(partition->pt_id, region->start_gfn,
-				     mmio_pfn, region->nr_pfns);
+				     region->mreg_mmio_pfn,
+				     region->nr_pfns);
+}
+
+int mshv_map_region(struct mshv_mem_region *region)
+{
+	switch (region->mreg_type) {
+	case MSHV_REGION_TYPE_MEM_PINNED:
+		return mshv_map_pinned_region(region);
+	case MSHV_REGION_TYPE_MEM_MOVABLE:
+		return mshv_map_movable_region(region);
+	case MSHV_REGION_TYPE_MMIO:
+		return mshv_map_mmio_region(region);
+	}
+
+	return -EINVAL;
 }
diff --git a/drivers/hv/mshv_root.h b/drivers/hv/mshv_root.h
index 1f92b9f85b60..2bcdfa070517 100644
--- a/drivers/hv/mshv_root.h
+++ b/drivers/hv/mshv_root.h
@@ -92,6 +92,7 @@ struct mshv_mem_region {
 	enum mshv_region_type mreg_type;
 	struct mmu_interval_notifier mreg_mni;
 	struct mutex mreg_mutex;	/* protects region PFNs remapping */
+	u64 mreg_mmio_pfn;
 	unsigned long mreg_pfns[];
 };
 
@@ -366,16 +367,13 @@ extern struct mshv_root mshv_root;
 extern enum hv_scheduler_type hv_scheduler_type;
 extern u8 * __percpu *hv_synic_eventring_tail;
 
-struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages,
-					   u64 uaddr, u32 flags);
+struct mshv_mem_region *mshv_region_create(enum mshv_region_type type,
+					   u64 guest_pfn, u64 nr_pfns,
+					   u64 uaddr, u32 flags,
+					   ulong mmio_pfn);
 void mshv_region_put(struct mshv_mem_region *region);
 int mshv_region_get(struct mshv_mem_region *region);
 bool mshv_region_handle_gfn_fault(struct mshv_mem_region *region, u64 gfn);
-void mshv_region_movable_fini(struct mshv_mem_region *region);
-bool mshv_region_movable_init(struct mshv_mem_region *region);
-int mshv_map_pinned_region(struct mshv_mem_region *region);
-int mshv_map_movable_region(struct mshv_mem_region *region);
-int mshv_map_mmio_region(struct mshv_mem_region *region,
-			 unsigned long mmio_pfn);
+int mshv_map_region(struct mshv_mem_region *region);
 
 #endif /* _MSHV_ROOT_H_ */
diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
index adb09350205a..3bfa9e9c575f 100644
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -1217,11 +1217,14 @@ static void mshv_async_hvcall_handler(void *data, u64 *status)
  */
 static int mshv_partition_create_region(struct mshv_partition *partition,
 					struct mshv_user_mem_region *mem,
-					struct mshv_mem_region **regionpp,
-					bool is_mmio)
+					struct mshv_mem_region **regionpp)
 {
 	struct mshv_mem_region *rg;
+	enum mshv_region_type type;
 	u64 nr_pfns = HVPFN_DOWN(mem->size);
+	struct vm_area_struct *vma;
+	ulong mmio_pfn;
+	bool is_mmio;
 
 	/* Reject overlapping regions */
 	spin_lock(&partition->pt_mem_regions_lock);
@@ -1234,18 +1237,27 @@ static int mshv_partition_create_region(struct mshv_partition *partition,
 	}
 	spin_unlock(&partition->pt_mem_regions_lock);
 
-	rg = mshv_region_create(mem->guest_pfn, nr_pfns,
-				mem->userspace_addr, mem->flags);
-	if (IS_ERR(rg))
-		return PTR_ERR(rg);
+	mmap_read_lock(current->mm);
+	vma = vma_lookup(current->mm, mem->userspace_addr);
+	is_mmio = vma ? !!(vma->vm_flags & (VM_IO | VM_PFNMAP)) : 0;
+	mmio_pfn = is_mmio ? vma->vm_pgoff : 0;
+	mmap_read_unlock(current->mm);
+
+	if (!vma)
+		return -EINVAL;
 
 	if (is_mmio)
-		rg->mreg_type = MSHV_REGION_TYPE_MMIO;
-	else if (mshv_partition_encrypted(partition) ||
-		 !mshv_region_movable_init(rg))
-		rg->mreg_type = MSHV_REGION_TYPE_MEM_PINNED;
+		type = MSHV_REGION_TYPE_MMIO;
+	else if (mshv_partition_encrypted(partition))
+		type = MSHV_REGION_TYPE_MEM_PINNED;
 	else
-		rg->mreg_type = MSHV_REGION_TYPE_MEM_MOVABLE;
+		type = MSHV_REGION_TYPE_MEM_MOVABLE;
+
+	rg = mshv_region_create(type, mem->guest_pfn, nr_pfns,
+				mem->userspace_addr, mem->flags,
+				mmio_pfn);
+	if (IS_ERR(rg))
+		return PTR_ERR(rg);
 
 	rg->partition = partition;
 
@@ -1271,40 +1283,17 @@ mshv_map_user_memory(struct mshv_partition *partition,
 		     struct mshv_user_mem_region mem)
 {
 	struct mshv_mem_region *region;
-	struct vm_area_struct *vma;
-	bool is_mmio;
-	ulong mmio_pfn;
 	long ret;
 
 	if (mem.flags & BIT(MSHV_SET_MEM_BIT_UNMAP) ||
 	    !access_ok((const void __user *)mem.userspace_addr, mem.size))
 		return -EINVAL;
 
-	mmap_read_lock(current->mm);
-	vma = vma_lookup(current->mm, mem.userspace_addr);
-	is_mmio = vma ? !!(vma->vm_flags & (VM_IO | VM_PFNMAP)) : 0;
-	mmio_pfn = is_mmio ? vma->vm_pgoff : 0;
-	mmap_read_unlock(current->mm);
-
-	if (!vma)
-		return -EINVAL;
-
-	ret = mshv_partition_create_region(partition, &mem, &region,
-					   is_mmio);
+	ret = mshv_partition_create_region(partition, &mem, &region);
 	if (ret)
 		return ret;
 
-	switch (region->mreg_type) {
-	case MSHV_REGION_TYPE_MEM_PINNED:
-		ret = mshv_map_pinned_region(region);
-		break;
-	case MSHV_REGION_TYPE_MEM_MOVABLE:
-		ret = mshv_map_movable_region(region);
-		break;
-	case MSHV_REGION_TYPE_MMIO:
-		ret = mshv_map_mmio_region(region, mmio_pfn);
-		break;
-	}
+	ret = mshv_map_region(region);
 
 	trace_mshv_map_user_memory(partition->pt_id, region->start_uaddr,
 				   region->start_gfn, region->nr_pfns,



^ permalink raw reply related

* [PATCH 0/7] mshv: Reduce memory consumption for unpinned regions
From: Stanislav Kinsburskii @ 2026-04-01 22:12 UTC (permalink / raw)
  To: kys, haiyangz, wei.liu, decui, longli; +Cc: linux-hyperv, linux-kernel

This series reduces memory consumption for unpinned regions by avoiding
PFN array allocation. A 1GB unpinned region currently wastes 2MB for an
unused PFN array that HMM-managed regions don't need.

The first three patches are preparatory refactoring. Patch 1 consolidates
region creation and mapping logic, reducing API surface by 4 functions.
Patch 2 introduces a typedef for PFN handler callbacks to simplify
function signatures. Patch 3 renames mshv_mem_region to mshv_region to
align with existing function naming conventions.

Patch 4 optimizes unmap and no-access remap operations by eliminating
redundant PFN iteration when all frames are guaranteed to be mapped.
This uses large page flags for aligned chunks and removes unnecessary
helper functions.

Patches 5-6 decouple PFN processing from the region->pfns storage.
Patch 5 threads the pfns pointer explicitly through the processing
chain. Patch 6 removes offset-based indexing by having callers pass
pre-offset pointers.

Patch 7 converts the pfns array from a flexible array member to a
conditional pointer, allocated only for pinned regions that need it
for share/unshare/evict operations. This eliminates the memory waste
for unpinned regions and allows using kzalloc instead of vzalloc.

---

Stanislav Kinsburskii (7):
      mshv: Consolidate region creation and mapping
      mshv: Improve code readability with handler function typedef
      mshv: Rename mshv_mem_region to mshv_region
      mshv: Optimize memory region mapping operations
      mshv: Pass pfns array explicitly through processing chain
      mshv: Simplify pfn array handling in region processing
      mshv: Allocate pfns array only for pinned regions

 drivers/hv/mshv_regions.c   |  345 +++++++++++++++++++++++++------------------
 drivers/hv/mshv_root.h      |   26 ++-
 drivers/hv/mshv_root_main.c |   79 ++++------
 3 files changed, 248 insertions(+), 202 deletions(-)

^ permalink raw reply

* Re: [PATCH 6/6] mshv: unmap debugfs stats pages on kexec
From: Jork Loeser @ 2026-04-01 17:47 UTC (permalink / raw)
  To: Stanislav Kinsburskii
  Cc: linux-hyperv, x86, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, H . Peter Anvin, Arnd Bergmann,
	Roman Kisel, Michael Kelley, linux-kernel, linux-arch
In-Reply-To: <acrxBOLaHdku6kdZ@skinsburskii.localdomain>

On Mon, 30 Mar 2026, Stanislav Kinsburskii wrote:

>> diff --git a/drivers/hv/mshv_debugfs.c b/drivers/hv/mshv_debugfs.c
[...]
>> +++ b/drivers/hv/mshv_debugfs.c
>> @@ -676,8 +676,10 @@ int __init mshv_debugfs_init(void)
> nit: this should allow to avoid setting mshv_debugfs to NULL in the
> error path of mshv_debugfs_init():
>
> if (!IS_ERR_OR_NULL(mshv_debugfs))

Yes, of course one could. Though a permanent ERR_PTR in a global variable 
to indicate init-problems feels off to me. NULL for "not there" seems more 
canonical, no?

Best,
Jork

^ permalink raw reply

* Re: [PATCH 4/6] mshv: limit SynIC management to MSHV-owned resources
From: Jork Loeser @ 2026-04-01 17:43 UTC (permalink / raw)
  To: Stanislav Kinsburskii
  Cc: linux-hyperv, x86, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, H . Peter Anvin, Arnd Bergmann,
	Roman Kisel, Michael Kelley, linux-kernel, linux-arch
In-Reply-To: <acrwYf50SJnDwN3e@skinsburskii.localdomain>

On Mon, 30 Mar 2026, Stanislav Kinsburskii wrote:

>> ---
>>  drivers/hv/mshv_synic.c | 109 ++++++++++++++++++++++++----------------
>>  1 file changed, 67 insertions(+), 42 deletions(-)
>>
>> diff --git a/drivers/hv/mshv_synic.c b/drivers/hv/mshv_synic.c
[...]
>> @@ -454,7 +454,6 @@ int mshv_synic_init(unsigned int cpu)
[...]
> Is it possible to split out the root partition logic to a separate
> function(s) instead of weawing it into this function?
>
> Ideally, there should be a generic function called by VMBUS and a
> root partition-specific function called by MSHV if needed.

There are three cases/components: VMBus (L1VH/client), MSHV(root), 
MSHV(L1VH). VMBus can exist with and without MSHV, and should be
self-standing. MSHV can have root or L1VH beahvior. One could split
the MSHV behavior across two functions, though keeping them in sync in the 
future seems more fragile. Let alone awkward semantic comparisons 
in reviews and bug-searches. So I think a single funtion for both MSHV 
cases is just way easier to maintain.

Best, Jork

^ permalink raw reply

* RE: [PATCH 11/11] Drivers: hv: Kconfig: Add ARM64 support for MSHV_VTL
From: Michael Kelley @ 2026-04-01 16:58 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-12-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 

Nit: In keeping with past practice, the "Subject" prefix for this patch could
just be "Drivers: hv:"

> Enable ARM64 support in MSHV_VTL Kconfig now that all the necessary
> support is present.
> 
> Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  drivers/hv/Kconfig | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
> index 7937ac0cbd0f..393cef272590 100644
> --- a/drivers/hv/Kconfig
> +++ b/drivers/hv/Kconfig
> @@ -87,7 +87,7 @@ config MSHV_ROOT
> 
>  config MSHV_VTL
>  	tristate "Microsoft Hyper-V VTL driver"
> -	depends on X86_64 && HYPERV_VTL_MODE
> +	depends on (X86_64 || ARM64) && HYPERV_VTL_MODE
>  	depends on HYPERV_VMBUS
>  	# Mapping VTL0 memory to a userspace process in VTL2 is supported in OpenHCL.
>  	# VTL2 for OpenHCL makes use of Huge Pages to improve performance on VMs,
> --
> 2.43.0
> 

The nit notwithstanding,

Reviewed-by: Michael Kelley <mhklinux@outlook.com>

^ permalink raw reply

* RE: [PATCH 10/11] Drivers: hv: Add support for arm64 in MSHV_VTL
From: Michael Kelley @ 2026-04-01 16:58 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-11-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 
> Add necessary support to make MSHV_VTL work for arm64 architecture.
> * Add stub implementation for mshv_vtl_return_call_init(): not required
>   for arm64
> * Remove fpu/legacy.h header inclusion, as this is not required
> * handle HV_REGISTER_VSM_CODE_PAGE_OFFSETS register: not supported
>   in arm64
> * Configure custom percpu_vmbus_handler by using
>   hv_setup_percpu_vmbus_handler()
> * Handle hugepage functions by config checks
> 
> Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  arch/arm64/include/asm/mshyperv.h |  2 ++
>  drivers/hv/mshv_vtl_main.c        | 21 ++++++++++++++-------
>  2 files changed, 16 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/mshyperv.h
> b/arch/arm64/include/asm/mshyperv.h
> index 36803f0386cc..027a7f062d70 100644
> --- a/arch/arm64/include/asm/mshyperv.h
> +++ b/arch/arm64/include/asm/mshyperv.h
> @@ -83,6 +83,8 @@ static inline int hv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set, u
>  	return 1;
>  }
> 
> +static inline void mshv_vtl_return_call_init(u64 vtl_return_offset) {}
> +
>  void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
>  bool hv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu);
>  #endif
> diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c
> index 4c9ae65ad3e8..5702fe258500 100644
> --- a/drivers/hv/mshv_vtl_main.c
> +++ b/drivers/hv/mshv_vtl_main.c
> @@ -23,8 +23,6 @@
>  #include <trace/events/ipi.h>
>  #include <uapi/linux/mshv.h>
>  #include <hyperv/hvhdk.h>
> -
> -#include "../../kernel/fpu/legacy.h"

Was there a particular code change that made this unnecessary? Or was it
unnecessary from the start of this source code file? Just curious ....

>  #include "mshv.h"
>  #include "mshv_vtl.h"
>  #include "hyperv_vmbus.h"
> @@ -206,18 +204,21 @@ static void mshv_vtl_synic_enable_regs(unsigned int cpu)
>  static int mshv_vtl_get_vsm_regs(void)
>  {
>  	struct hv_register_assoc registers[2];
> -	int ret, count = 2;
> +	int ret, count = 0;
> 
> -	registers[0].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS;
> -	registers[1].name = HV_REGISTER_VSM_CAPABILITIES;
> +	registers[count++].name = HV_REGISTER_VSM_CAPABILITIES;
> +	/* Code page offset register is not supported on ARM */
> +	if (IS_ENABLED(CONFIG_X86_64))
> +		registers[count++].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS;
> 
>  	ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
>  				       count, input_vtl_zero, registers);
>  	if (ret)
>  		return ret;
> 
> -	mshv_vsm_page_offsets.as_uint64 = registers[0].value.reg64;
> -	mshv_vsm_capabilities.as_uint64 = registers[1].value.reg64;
> +	mshv_vsm_capabilities.as_uint64 = registers[0].value.reg64;
> +	if (IS_ENABLED(CONFIG_X86_64))
> +		mshv_vsm_page_offsets.as_uint64 = registers[1].value.reg64;
> 
>  	return ret;
>  }

This function has gotten somewhat messy to handle the x86 and arm64
differences. Let me suggest a different approach. Have this function only
get the VSM capabilities register, as that is generic across x86 and
arm64. Then, update x86 mshv_vtl_return_call_init() to get the
PAGE_OFFSETS register and then immediately use the value to update
the static call. The global variable mshv_vms_page_offsets is no longer
necessary.

My suggestion might be little more code because hv_call_get_vp_registers()
is invoked in two different places. But it cleanly separates the two use
cases, and keeps the x86 hackery under arch/x86.

> @@ -280,10 +281,13 @@ static int hv_vtl_setup_synic(void)
> 
>  	/* Use our isr to first filter out packets destined for userspace */
>  	hv_setup_vmbus_handler(mshv_vtl_vmbus_isr);
> +	/* hv_setup_vmbus_handler() is stubbed for ARM64, add per-cpu VMBus handlers instead */
> +	hv_setup_percpu_vmbus_handler(mshv_vtl_vmbus_isr);
> 
>  	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vtl:online",
>  				mshv_vtl_alloc_context, NULL);
>  	if (ret < 0) {
> +		hv_setup_percpu_vmbus_handler(vmbus_isr);
>  		hv_setup_vmbus_handler(vmbus_isr);
>  		return ret;
>  	}
> @@ -296,6 +300,7 @@ static int hv_vtl_setup_synic(void)
>  static void hv_vtl_remove_synic(void)
>  {
>  	cpuhp_remove_state(mshv_vtl_cpuhp_online);
> +	hv_setup_percpu_vmbus_handler(vmbus_isr);
>  	hv_setup_vmbus_handler(vmbus_isr);
>  }
> 
> @@ -1080,10 +1085,12 @@ static vm_fault_t mshv_vtl_low_huge_fault(struct vm_fault *vmf, unsigned int ord
>  			ret = vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
>  		return ret;
> 
> +#if defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
>  	case PUD_ORDER:
>  		if (can_fault(vmf, PUD_SIZE, &pfn))
>  			ret = vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
>  		return ret;
> +#endif
> 
>  	default:
>  		return VM_FAULT_SIGBUS;
> --
> 2.43.0
> 


^ permalink raw reply

* RE: [PATCH 09/11] Drivers: hv: mshv_vtl: Let userspace do VSM configuration
From: Michael Kelley @ 2026-04-01 16:58 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-10-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 
> The kernel currently sets the VSM configuration register, thereby
> imposing certain VSM configuration on the userspace (OpenVMM).
> 
> The userspace (OpenVMM) has the capability to configure this register,
> and it is already doing it using the generic hypercall interface.
> The configuration can vary based on the use case or architectures, so
> let userspace take care of configuring it and remove this logic in the
> kernel driver.
> 
> Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  drivers/hv/mshv_vtl_main.c | 29 -----------------------------
>  1 file changed, 29 deletions(-)
> 
> diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c
> index c79d24317b8e..4c9ae65ad3e8 100644
> --- a/drivers/hv/mshv_vtl_main.c
> +++ b/drivers/hv/mshv_vtl_main.c
> @@ -222,30 +222,6 @@ static int mshv_vtl_get_vsm_regs(void)
>  	return ret;
>  }
> 
> -static int mshv_vtl_configure_vsm_partition(struct device *dev)
> -{
> -	union hv_register_vsm_partition_config config;
> -	struct hv_register_assoc reg_assoc;
> -
> -	config.as_uint64 = 0;
> -	config.default_vtl_protection_mask = HV_MAP_GPA_PERMISSIONS_MASK;
> -	config.enable_vtl_protection = 1;
> -	config.zero_memory_on_reset = 1;
> -	config.intercept_vp_startup = 1;
> -	config.intercept_cpuid_unimplemented = 1;
> -
> -	if (mshv_vsm_capabilities.intercept_page_available) {
> -		dev_dbg(dev, "using intercept page\n");
> -		config.intercept_page = 1;
> -	}
> -
> -	reg_assoc.name = HV_REGISTER_VSM_PARTITION_CONFIG;
> -	reg_assoc.value.reg64 = config.as_uint64;
> -
> -	return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
> -				       1, input_vtl_zero, &reg_assoc);
> -}
> -
>  static void mshv_vtl_vmbus_isr(void)
>  {
>  	struct hv_per_cpu_context *per_cpu;
> @@ -1168,11 +1144,6 @@ static int __init mshv_vtl_init(void)
>  		ret = -ENODEV;
>  		goto free_dev;
>  	}
> -	if (mshv_vtl_configure_vsm_partition(dev)) {
> -		dev_emerg(dev, "VSM configuration failed !!\n");
> -		ret = -ENODEV;
> -		goto free_dev;
> -	}
> 
>  	mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset);
>  	ret = hv_vtl_setup_synic();
> --
> 2.43.0
> 

Reviewed-by: Michael Kelley <mhklinux@outlook.com>


^ permalink raw reply

* RE: [PATCH 08/11] Drivers: hv: mshv_vtl: Move register page config to arch-specific files
From: Michael Kelley @ 2026-04-01 16:58 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-9-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 
> Move mshv_vtl_configure_reg_page() implementation from
> drivers/hv/mshv_vtl_main.c to arch-specific files:
> - arch/x86/hyperv/hv_vtl.c: full implementation with register page setup
> - arch/arm64/hyperv/hv_vtl.c: stub implementation (unsupported)
> 
> Move common type definitions to include/asm-generic/mshyperv.h:
> - struct mshv_vtl_per_cpu
> - union hv_synic_overlay_page_msr
> 
> Move hv_call_get_vp_registers() and hv_call_set_vp_registers()
> declarations to include/asm-generic/mshyperv.h since these functions
> are used by multiple modules.
> 
> While at it, remove the unnecessary stub implementations in #else
> case for mshv_vtl_return* functions in arch/x86/include/asm/mshyperv.h.

Seems like this patch is doing multiple things. The reg page configuration
changes are more substantial and should probably be in a patch by
themselves. The other changes are more trivial and maybe are OK
grouped into a single patch, but you could also consider breaking them
out.

> 
> This is essential for adding support for ARM64 in MSHV_VTL.
> 
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  arch/arm64/hyperv/hv_vtl.c        |  8 +++++
>  arch/arm64/include/asm/mshyperv.h |  3 ++
>  arch/x86/hyperv/hv_vtl.c          | 32 ++++++++++++++++++++
>  arch/x86/include/asm/mshyperv.h   |  7 ++---
>  drivers/hv/mshv.h                 |  8 -----
>  drivers/hv/mshv_vtl_main.c        | 49 +++----------------------------
>  include/asm-generic/mshyperv.h    | 42 ++++++++++++++++++++++++++
>  7 files changed, 92 insertions(+), 57 deletions(-)
> 
> diff --git a/arch/arm64/hyperv/hv_vtl.c b/arch/arm64/hyperv/hv_vtl.c
> index 66318672c242..d699138427c1 100644
> --- a/arch/arm64/hyperv/hv_vtl.c
> +++ b/arch/arm64/hyperv/hv_vtl.c
> @@ -10,6 +10,7 @@
>  #include <asm/boot.h>
>  #include <asm/mshyperv.h>
>  #include <asm/cpu_ops.h>
> +#include <linux/export.h>
> 
>  void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0)
>  {
> @@ -142,3 +143,10 @@ void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0)
>  		"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
>  }
>  EXPORT_SYMBOL(mshv_vtl_return_call);
> +
> +bool hv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
> +{
> +	pr_debug("Register page not supported on ARM64\n");
> +	return false;
> +}
> +EXPORT_SYMBOL_GPL(hv_vtl_configure_reg_page);
> diff --git a/arch/arm64/include/asm/mshyperv.h
> b/arch/arm64/include/asm/mshyperv.h
> index de7f3a41a8ea..36803f0386cc 100644
> --- a/arch/arm64/include/asm/mshyperv.h
> +++ b/arch/arm64/include/asm/mshyperv.h
> @@ -61,6 +61,8 @@ static inline u64 hv_get_non_nested_msr(unsigned int reg)
>  				ARM_SMCCC_OWNER_VENDOR_HYP,	\
>  				HV_SMCCC_FUNC_NUMBER)
> 
> +struct mshv_vtl_per_cpu;
> +
>  struct mshv_vtl_cpu_context {
>  /*
>   * NOTE: x18 is managed by the hypervisor. It won't be reloaded from this array.
> @@ -82,6 +84,7 @@ static inline int hv_vtl_get_set_reg(struct hv_register_assoc *regs,
> bool set, u
>  }
> 
>  void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
> +bool hv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu);

I think this declaration could be added in asm-generic/mshyperv.h so that it
is shared by x86 and arm64. That also obviates the need for the forward
ref to struct mshv_vtl_per_cpu that you've added here.

>  #endif
> 
>  #include <asm-generic/mshyperv.h>
> diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
> index 72a0bb4ae0c7..ede290985d41 100644
> --- a/arch/x86/hyperv/hv_vtl.c
> +++ b/arch/x86/hyperv/hv_vtl.c
> @@ -20,6 +20,7 @@
>  #include <uapi/asm/mtrr.h>
>  #include <asm/debugreg.h>
>  #include <linux/export.h>
> +#include <linux/hyperv.h>
>  #include <../kernel/smpboot.h>
>  #include "../../kernel/fpu/legacy.h"
> 
> @@ -259,6 +260,37 @@ int __init hv_vtl_early_init(void)
>  	return 0;
>  }
> 
> +static const union hv_input_vtl input_vtl_zero;
> +
> +bool hv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
> +{
> +	struct hv_register_assoc reg_assoc = {};
> +	union hv_synic_overlay_page_msr overlay = {};
> +	struct page *reg_page;
> +
> +	reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
> +	if (!reg_page) {
> +		WARN(1, "failed to allocate register page\n");
> +		return false;
> +	}
> +
> +	overlay.enabled = 1;
> +	overlay.pfn = page_to_hvpfn(reg_page);
> +	reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
> +	reg_assoc.value.reg64 = overlay.as_uint64;
> +
> +	if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
> +				     1, input_vtl_zero, &reg_assoc)) {
> +		WARN(1, "failed to setup register page\n");
> +		__free_page(reg_page);
> +		return false;
> +	}
> +
> +	per_cpu->reg_page = reg_page;
> +	return true;

As Sashiko AI noted, the memory allocated for the reg_page never gets freed.

> +}
> +EXPORT_SYMBOL_GPL(hv_vtl_configure_reg_page);
> +
>  DEFINE_STATIC_CALL_NULL(__mshv_vtl_return_hypercall, void (*)(void));
> 
>  void mshv_vtl_return_call_init(u64 vtl_return_offset)
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index d5355a5b7517..d592fea49cdb 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -271,6 +271,8 @@ static inline u64 hv_get_non_nested_msr(unsigned int reg) {
> return 0; }
>  static inline int hv_apicid_to_vp_index(u32 apic_id) { return -EINVAL; }
>  #endif /* CONFIG_HYPERV */
> 
> +struct mshv_vtl_per_cpu;
> +
>  struct mshv_vtl_cpu_context {
>  	union {
>  		struct {
> @@ -305,13 +307,10 @@ void mshv_vtl_return_call_init(u64 vtl_return_offset);
>  void mshv_vtl_return_hypercall(void);
>  void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
>  int hv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set, u64 shared);
> +bool hv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu);

Same as for arm64. Add a shared declaration in asm-generic/mshyperv.h.

>  #else
>  static inline void __init hv_vtl_init_platform(void) {}
>  static inline int __init hv_vtl_early_init(void) { return 0; }
> -static inline void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
> -static inline void mshv_vtl_return_call_init(u64 vtl_return_offset) {}
> -static inline void mshv_vtl_return_hypercall(void) {}
> -static inline void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
>  #endif
> 
>  #include <asm-generic/mshyperv.h>
> diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
> index d4813df92b9c..0fcb7f9ba6a9 100644
> --- a/drivers/hv/mshv.h
> +++ b/drivers/hv/mshv.h
> @@ -14,14 +14,6 @@
>  	memchr_inv(&((STRUCT).MEMBER), \
>  		   0, sizeof_field(typeof(STRUCT), MEMBER))
> 
> -int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count,
> -			     union hv_input_vtl input_vtl,
> -			     struct hv_register_assoc *registers);
> -
> -int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count,
> -			     union hv_input_vtl input_vtl,
> -			     struct hv_register_assoc *registers);
> -
>  int hv_call_get_partition_property(u64 partition_id, u64 property_code,
>  				   u64 *property_value);
> 
> diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c
> index 91517b45d526..c79d24317b8e 100644
> --- a/drivers/hv/mshv_vtl_main.c
> +++ b/drivers/hv/mshv_vtl_main.c
> @@ -78,21 +78,6 @@ struct mshv_vtl {
>  	u64 id;
>  };
> 
> -struct mshv_vtl_per_cpu {
> -	struct mshv_vtl_run *run;
> -	struct page *reg_page;
> -};
> -
> -/* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
> -union hv_synic_overlay_page_msr {
> -	u64 as_uint64;
> -	struct {
> -		u64 enabled: 1;
> -		u64 reserved: 11;
> -		u64 pfn: 52;
> -	} __packed;
> -};
> -
>  static struct mutex mshv_vtl_poll_file_lock;
>  static union hv_register_vsm_page_offsets mshv_vsm_page_offsets;
>  static union hv_register_vsm_capabilities mshv_vsm_capabilities;
> @@ -201,34 +186,6 @@ static struct page *mshv_vtl_cpu_reg_page(int cpu)
>  	return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu);
>  }
> 
> -static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
> -{
> -	struct hv_register_assoc reg_assoc = {};
> -	union hv_synic_overlay_page_msr overlay = {};
> -	struct page *reg_page;
> -
> -	reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
> -	if (!reg_page) {
> -		WARN(1, "failed to allocate register page\n");
> -		return;
> -	}
> -
> -	overlay.enabled = 1;
> -	overlay.pfn = page_to_hvpfn(reg_page);
> -	reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
> -	reg_assoc.value.reg64 = overlay.as_uint64;
> -
> -	if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
> -				     1, input_vtl_zero, &reg_assoc)) {
> -		WARN(1, "failed to setup register page\n");
> -		__free_page(reg_page);
> -		return;
> -	}
> -
> -	per_cpu->reg_page = reg_page;
> -	mshv_has_reg_page = true;
> -}
> -
>  static void mshv_vtl_synic_enable_regs(unsigned int cpu)
>  {
>  	union hv_synic_sint sint;
> @@ -329,8 +286,10 @@ static int mshv_vtl_alloc_context(unsigned int cpu)
>  	if (!per_cpu->run)
>  		return -ENOMEM;
> 
> -	if (mshv_vsm_capabilities.intercept_page_available)
> -		mshv_vtl_configure_reg_page(per_cpu);
> +	if (mshv_vsm_capabilities.intercept_page_available) {
> +		if (hv_vtl_configure_reg_page(per_cpu))
> +			mshv_has_reg_page = true;

As Sashiko AI noted, it doesn't work to use the global mshv_has_reg_page
to indicate the success of configuring the reg page, which is a per-cpu
operation. But this bug existed before this patch set, so maybe it should
be fixed as a preliminary patch.

> +	}
> 
>  	mshv_vtl_synic_enable_regs(cpu);
> 
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index b147a12085e4..b53fcc071596 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -383,8 +383,50 @@ static inline int hv_deposit_memory(u64 partition_id, u64 status)
>  	return hv_deposit_memory_node(NUMA_NO_NODE, partition_id, status);
>  }
> 
> +#if IS_ENABLED(CONFIG_MSHV_ROOT) || IS_ENABLED(CONFIG_MSHV_VTL)
> +int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count,
> +			     union hv_input_vtl input_vtl,
> +			     struct hv_register_assoc *registers);
> +
> +int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count,
> +			     union hv_input_vtl input_vtl,
> +			     struct hv_register_assoc *registers);
> +#else
> +static inline int hv_call_get_vp_registers(u32 vp_index, u64 partition_id,
> +					   u16 count,
> +					   union hv_input_vtl input_vtl,
> +					   struct hv_register_assoc *registers)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +static inline int hv_call_set_vp_registers(u32 vp_index, u64 partition_id,
> +					   u16 count,
> +					   union hv_input_vtl input_vtl,
> +					   struct hv_register_assoc *registers)
> +{
> +	return -EOPNOTSUPP;
> +}
> +#endif /* CONFIG_MSHV_ROOT || CONFIG_MSHV_VTL */
> +
>  #define HV_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
> +
>  #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
> +struct mshv_vtl_per_cpu {
> +	struct mshv_vtl_run *run;
> +	struct page *reg_page;
> +};
> +
> +/* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
> +union hv_synic_overlay_page_msr {
> +	u64 as_uint64;
> +	struct {
> +		u64 enabled: 1;
> +		u64 reserved: 11;
> +		u64 pfn: 52;
> +	} __packed;
> +};
> +
>  u8 __init get_vtl(void);
>  #else
>  static inline u8 get_vtl(void) { return 0; }
> --
> 2.43.0
> 

Sashiko AI noted another existing bug in mshv_vtl_init(), which is that
the error path does kfree(mem_dev) when it should do
put_device(mem_dev).  See the comment in the header of
device_initialize().


^ permalink raw reply

* RE: [PATCH 07/11] arch: arm64: Add support for mshv_vtl_return_call
From: Michael Kelley @ 2026-04-01 16:57 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-8-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 

Nit: For historical consistency, use "arm64: hyperv:" as the prefix in the patch Subject.

> Add support for arm64 specific variant of mshv_vtl_return_call function
> to be able to add support for arm64 in MSHV_VTL driver. This would
> help enable the transition between Virtual Trust Levels (VTL) in
> MSHV_VTL when the kernel acts as a paravisor.

This commit message has a fair number of "filler" words. Suggest simplifying to:

Add the arm64 variant of mshv_vtl_return_call() to support the MSHV_VTL
driver on arm64.  This function enables the transition between Virtual Trust
Levels (VTLs) in MSHV_VTL when the kernel acts as a paravisor.

> 
> Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  arch/arm64/hyperv/Makefile        |   1 +
>  arch/arm64/hyperv/hv_vtl.c        | 144 ++++++++++++++++++++++++++++++
>  arch/arm64/include/asm/mshyperv.h |  13 +++
>  3 files changed, 158 insertions(+)
>  create mode 100644 arch/arm64/hyperv/hv_vtl.c
> 
> diff --git a/arch/arm64/hyperv/Makefile b/arch/arm64/hyperv/Makefile
> index 87c31c001da9..9701a837a6e1 100644
> --- a/arch/arm64/hyperv/Makefile
> +++ b/arch/arm64/hyperv/Makefile
> @@ -1,2 +1,3 @@
>  # SPDX-License-Identifier: GPL-2.0
>  obj-y		:= hv_core.o mshyperv.o
> +obj-$(CONFIG_HYPERV_VTL_MODE)	+= hv_vtl.o
> diff --git a/arch/arm64/hyperv/hv_vtl.c b/arch/arm64/hyperv/hv_vtl.c
> new file mode 100644
> index 000000000000..66318672c242
> --- /dev/null
> +++ b/arch/arm64/hyperv/hv_vtl.c
> @@ -0,0 +1,144 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2026, Microsoft, Inc.
> + *
> + * Authors:
> + *     Roman Kisel <romank@linux.microsoft.com>
> + *     Naman Jain <namjain@linux.microsoft.com>
> + */
> +
> +#include <asm/boot.h>
> +#include <asm/mshyperv.h>
> +#include <asm/cpu_ops.h>
> +
> +void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0)
> +{
> +	u64 base_ptr = (u64)vtl0->x;
> +
> +	/*
> +	 * VTL switch for ARM64 platform - managing VTL0's CPU context.
> +	 * We explicitly use the stack to save the base pointer, and use x16
> +	 * as our working register for accessing the context structure.
> +	 *
> +	 * Register Handling:
> +	 * - X0-X17: Saved/restored (general-purpose, shared for VTL communication)
> +	 * - X18: NOT touched - hypervisor-managed per-VTL (platform register)
> +	 * - X19-X30: Saved/restored (part of VTL0's execution context)
> +	 * - Q0-Q31: Saved/restored (128-bit NEON/floating-point registers, shared)
> +	 * - SP: Not in structure, hypervisor-managed per-VTL
> +	 *
> +	 * Note: X29 (FP) and X30 (LR) are in the structure and must be saved/restored
> +	 * as part of VTL0's complete execution state.

Could drop "Note:".  That's what comments are. :-)


> +	 */
> +	asm __volatile__ (
> +		/* Save base pointer to stack explicitly, then load into x16 */
> +		"str %0, [sp, #-16]!\n\t"     /* Push base pointer onto stack */
> +		"mov x16, %0\n\t"             /* Load base pointer into x16 */
> +		/* Volatile registers (Windows ARM64 ABI: x0-x15) */
> +		"ldp x0, x1, [x16]\n\t"
> +		"ldp x2, x3, [x16, #(2*8)]\n\t"

On the x86 side, there's machinery to generate a series of constants that are
the offsets of the individual fields in struct mshv_vtl_cpu_context. The x86
asm code then uses these constants. Here on the arm64 side, you've calculated
the offsets directly in the asm code. Any reason for the difference? I can see
it's fairly easy to eyeball the offsets here and compare against the registers
that are being loaded, where it's not so easy the way registers are named
on x86. So maybe the additional machinery that's helpful on the x86 side
is less necessary here. Just wondering ....

> +		"ldp x4, x5, [x16, #(4*8)]\n\t"
> +		"ldp x6, x7, [x16, #(6*8)]\n\t"
> +		"ldp x8, x9, [x16, #(8*8)]\n\t"
> +		"ldp x10, x11, [x16, #(10*8)]\n\t"
> +		"ldp x12, x13, [x16, #(12*8)]\n\t"
> +		"ldp x14, x15, [x16, #(14*8)]\n\t"
> +		/* x16 will be loaded last, after saving base pointer */
> +		"ldr x17, [x16, #(17*8)]\n\t"
> +		/* x18 is hypervisor-managed per-VTL - DO NOT LOAD */
> +
> +		/* General-purpose registers: x19-x30 */
> +		"ldp x19, x20, [x16, #(19*8)]\n\t"
> +		"ldp x21, x22, [x16, #(21*8)]\n\t"
> +		"ldp x23, x24, [x16, #(23*8)]\n\t"
> +		"ldp x25, x26, [x16, #(25*8)]\n\t"
> +		"ldp x27, x28, [x16, #(27*8)]\n\t"
> +
> +		/* Frame pointer and link register */
> +		"ldp x29, x30, [x16, #(29*8)]\n\t"
> +
> +		/* Shared NEON/FP registers: Q0-Q31 (128-bit) */
> +		"ldp q0, q1, [x16, #(32*8)]\n\t"
> +		"ldp q2, q3, [x16, #(32*8 + 2*16)]\n\t"
> +		"ldp q4, q5, [x16, #(32*8 + 4*16)]\n\t"
> +		"ldp q6, q7, [x16, #(32*8 + 6*16)]\n\t"
> +		"ldp q8, q9, [x16, #(32*8 + 8*16)]\n\t"
> +		"ldp q10, q11, [x16, #(32*8 + 10*16)]\n\t"
> +		"ldp q12, q13, [x16, #(32*8 + 12*16)]\n\t"
> +		"ldp q14, q15, [x16, #(32*8 + 14*16)]\n\t"
> +		"ldp q16, q17, [x16, #(32*8 + 16*16)]\n\t"
> +		"ldp q18, q19, [x16, #(32*8 + 18*16)]\n\t"
> +		"ldp q20, q21, [x16, #(32*8 + 20*16)]\n\t"
> +		"ldp q22, q23, [x16, #(32*8 + 22*16)]\n\t"
> +		"ldp q24, q25, [x16, #(32*8 + 24*16)]\n\t"
> +		"ldp q26, q27, [x16, #(32*8 + 26*16)]\n\t"
> +		"ldp q28, q29, [x16, #(32*8 + 28*16)]\n\t"
> +		"ldp q30, q31, [x16, #(32*8 + 30*16)]\n\t"
> +
> +		/* Now load x16 itself */
> +		"ldr x16, [x16, #(16*8)]\n\t"
> +
> +		/* Return to the lower VTL */
> +		"hvc #3\n\t"
> +
> +		/* Save context after return - reload base pointer from stack */
> +		"stp x16, x17, [sp, #-16]!\n\t" /* Save x16, x17 temporarily */
> +		"ldr x16, [sp, #16]\n\t"        /* Reload base pointer (skip saved x16,x17) */
> +
> +		/* Volatile registers */
> +		"stp x0, x1, [x16]\n\t"
> +		"stp x2, x3, [x16, #(2*8)]\n\t"
> +		"stp x4, x5, [x16, #(4*8)]\n\t"
> +		"stp x6, x7, [x16, #(6*8)]\n\t"
> +		"stp x8, x9, [x16, #(8*8)]\n\t"
> +		"stp x10, x11, [x16, #(10*8)]\n\t"
> +		"stp x12, x13, [x16, #(12*8)]\n\t"
> +		"stp x14, x15, [x16, #(14*8)]\n\t"
> +		"ldp x0, x1, [sp], #16\n\t"      /* Recover saved x16, x17 */
> +		"stp x0, x1, [x16, #(16*8)]\n\t"
> +		/* x18 is hypervisor-managed - DO NOT SAVE */
> +
> +		/* General-purpose registers: x19-x30 */
> +		"stp x19, x20, [x16, #(19*8)]\n\t"
> +		"stp x21, x22, [x16, #(21*8)]\n\t"
> +		"stp x23, x24, [x16, #(23*8)]\n\t"
> +		"stp x25, x26, [x16, #(25*8)]\n\t"
> +		"stp x27, x28, [x16, #(27*8)]\n\t"
> +		"stp x29, x30, [x16, #(29*8)]\n\t"  /* Frame pointer and link register */
> +
> +		/* Shared NEON/FP registers: Q0-Q31 (128-bit) */
> +		"stp q0, q1, [x16, #(32*8)]\n\t"
> +		"stp q2, q3, [x16, #(32*8 + 2*16)]\n\t"
> +		"stp q4, q5, [x16, #(32*8 + 4*16)]\n\t"
> +		"stp q6, q7, [x16, #(32*8 + 6*16)]\n\t"
> +		"stp q8, q9, [x16, #(32*8 + 8*16)]\n\t"
> +		"stp q10, q11, [x16, #(32*8 + 10*16)]\n\t"
> +		"stp q12, q13, [x16, #(32*8 + 12*16)]\n\t"
> +		"stp q14, q15, [x16, #(32*8 + 14*16)]\n\t"
> +		"stp q16, q17, [x16, #(32*8 + 16*16)]\n\t"
> +		"stp q18, q19, [x16, #(32*8 + 18*16)]\n\t"
> +		"stp q20, q21, [x16, #(32*8 + 20*16)]\n\t"
> +		"stp q22, q23, [x16, #(32*8 + 22*16)]\n\t"
> +		"stp q24, q25, [x16, #(32*8 + 24*16)]\n\t"
> +		"stp q26, q27, [x16, #(32*8 + 26*16)]\n\t"
> +		"stp q28, q29, [x16, #(32*8 + 28*16)]\n\t"
> +		"stp q30, q31, [x16, #(32*8 + 30*16)]\n\t"
> +
> +		/* Clean up stack - pop base pointer */
> +		"add sp, sp, #16\n\t"
> +
> +		: /* No outputs */
> +		: /* Input */ "r"(base_ptr)
> +		: /* Clobber list - x16 used as base, x18 is hypervisor-managed (not touched) */
> +		"memory", "cc",
> +		"x0", "x1", "x2", "x3", "x4", "x5",
> +		"x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13",
> +		"x14", "x15", "x16", "x17", "x19", "x20", "x21",
> +		"x22", "x23", "x24", "x25", "x26", "x27", "x28",
> +		"x29", "x30",
> +		"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
> +		"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
> +		"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
> +		"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
> +}
> +EXPORT_SYMBOL(mshv_vtl_return_call);
> diff --git a/arch/arm64/include/asm/mshyperv.h
> b/arch/arm64/include/asm/mshyperv.h
> index 804068e0941b..de7f3a41a8ea 100644
> --- a/arch/arm64/include/asm/mshyperv.h
> +++ b/arch/arm64/include/asm/mshyperv.h
> @@ -60,6 +60,17 @@ static inline u64 hv_get_non_nested_msr(unsigned int reg)
>  				ARM_SMCCC_SMC_64,		\
>  				ARM_SMCCC_OWNER_VENDOR_HYP,	\
>  				HV_SMCCC_FUNC_NUMBER)
> +
> +struct mshv_vtl_cpu_context {
> +/*
> + * NOTE: x18 is managed by the hypervisor. It won't be reloaded from this array.
> + * It is included here for convenience in the common case.

I'm not getting your point in this last sentence. What is the "common case"?

You could also drop the "NOTE: " prefix.

> + */
> +	__u64 x[31];
> +	__u64 rsvd;
> +	__uint128_t q[32];
> +};

struct mshv_vtl_run reserves 1024 bytes for cpu_context. It would be nice to
have a compile-time check that the size of struct mshv_vtl_cpu_context fits in
that 1024 bytes. That check might be better added where struct mshv_vtl_run
is defined so that it works for both x86 and arm64.

> +
>  #ifdef CONFIG_HYPERV_VTL_MODE
>  /*
>   * Get/Set the register. If the function returns `1`, that must be done via
> @@ -69,6 +80,8 @@ static inline int hv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set, u
>  {
>  	return 1;
>  }
> +
> +void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);

This declaration now duplicated in mshyperv.h under arch/arm64 and under
arch/x86.  Instead, it should be added to asm-generic/mshyperv.h, and
removed from the arch/x86 mshyperv.h, so that there's only a single
instance of the declaration.

>  #endif
> 
>  #include <asm-generic/mshyperv.h>
> --
> 2.43.0
> 


^ permalink raw reply

* RE: [PATCH 06/11] Drivers: hv: Make sint vector architecture neutral in MSHV_VTL
From: Michael Kelley @ 2026-04-01 16:57 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-7-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 
> Generalize Synthetic interrupt source vector (sint) to use
> vmbus_interrupt variable instead, which automatically takes care of
> architectures where HYPERVISOR_CALLBACK_VECTOR is not present (arm64).

Sashiko AI raised an interesting question about the startup timing --
whether the vmbus_platform_driver_probe() is guaranteed to have
set vmbus_interrupt before the VTL functions below run and use it.
What causes the mshv_vtl.ko module to be loaded, and hence run
mshv_vtl_init()?

> 
> Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  drivers/hv/mshv_vtl_main.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c
> index b607b6e7e121..91517b45d526 100644
> --- a/drivers/hv/mshv_vtl_main.c
> +++ b/drivers/hv/mshv_vtl_main.c
> @@ -234,7 +234,7 @@ static void mshv_vtl_synic_enable_regs(unsigned int cpu)
>  	union hv_synic_sint sint;
> 
>  	sint.as_uint64 = 0;
> -	sint.vector = HYPERVISOR_CALLBACK_VECTOR;
> +	sint.vector = vmbus_interrupt;
>  	sint.masked = false;
>  	sint.auto_eoi = hv_recommend_using_aeoi();
> 
> @@ -753,7 +753,7 @@ static void mshv_vtl_synic_mask_vmbus_sint(void *info)
>  	const u8 *mask = info;
> 
>  	sint.as_uint64 = 0;
> -	sint.vector = HYPERVISOR_CALLBACK_VECTOR;
> +	sint.vector = vmbus_interrupt;
>  	sint.masked = (*mask != 0);
>  	sint.auto_eoi = hv_recommend_using_aeoi();
> 
> --
> 2.43.0
> 

Assuming there's no timing problem vs. the VMBus driver,

Reviewed-by: Michael Kelley <mhklinux@outlook.com>

^ permalink raw reply

* RE: [PATCH 05/11] drivers: hv: Export vmbus_interrupt for mshv_vtl module
From: Michael Kelley @ 2026-04-01 16:56 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-6-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 

Nit:  For the patch Subject, capitalize "Drivers:" in the prefix.

> vmbus_interrupt is used in mshv_vtl_main.c to set the SINT vector.
> When CONFIG_MSHV_VTL=m and CONFIG_HYPERV_VMBUS=y (built-in), the module
> cannot access vmbus_interrupt at load time since it is not exported.
> 
> Export it using EXPORT_SYMBOL_FOR_MODULES consistent with the existing
> pattern used for vmbus_isr.
> 
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  drivers/hv/vmbus_drv.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index f99d4f2d3862..de191799a8f6 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -57,6 +57,7 @@ static DEFINE_PER_CPU(long, vmbus_evt);
>  /* Values parsed from ACPI DSDT */
>  int vmbus_irq;
>  int vmbus_interrupt;
> +EXPORT_SYMBOL_FOR_MODULES(vmbus_interrupt, "mshv_vtl");
> 
>  /*
>   * If the Confidential VMBus is used, the data on the "wire" is not
> --
> 2.43.0
> 

Reviewed-by: Michael Kelley <mhklinux@outlook.com>


^ permalink raw reply

* RE: [PATCH 04/11] Drivers: hv: Refactor mshv_vtl for ARM64 support to be added
From: Michael Kelley @ 2026-04-01 16:56 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-5-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 
> Refactor MSHV_VTL driver to move some of the x86 specific code to arch
> specific files, and add corresponding functions for arm64.
> 
> Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  arch/arm64/include/asm/mshyperv.h |  10 +++
>  arch/x86/hyperv/hv_vtl.c          |  98 ++++++++++++++++++++++++++++
>  arch/x86/include/asm/mshyperv.h   |   1 +
>  drivers/hv/mshv_vtl_main.c        | 102 +-----------------------------
>  4 files changed, 111 insertions(+), 100 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/mshyperv.h
> b/arch/arm64/include/asm/mshyperv.h
> index b721d3134ab6..804068e0941b 100644
> --- a/arch/arm64/include/asm/mshyperv.h
> +++ b/arch/arm64/include/asm/mshyperv.h
> @@ -60,6 +60,16 @@ static inline u64 hv_get_non_nested_msr(unsigned int reg)
>  				ARM_SMCCC_SMC_64,		\
>  				ARM_SMCCC_OWNER_VENDOR_HYP,	\
>  				HV_SMCCC_FUNC_NUMBER)
> +#ifdef CONFIG_HYPERV_VTL_MODE
> +/*
> + * Get/Set the register. If the function returns `1`, that must be done via
> + * a hypercall. Returning `0` means success.
> + */
> +static inline int hv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set, u64 shared)
> +{
> +	return 1;
> +}
> +#endif
> 
>  #include <asm-generic/mshyperv.h>
> 
> diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
> index 9b6a9bc4ab76..72a0bb4ae0c7 100644
> --- a/arch/x86/hyperv/hv_vtl.c
> +++ b/arch/x86/hyperv/hv_vtl.c
> @@ -17,6 +17,8 @@
>  #include <asm/realmode.h>
>  #include <asm/reboot.h>
>  #include <asm/smap.h>
> +#include <uapi/asm/mtrr.h>
> +#include <asm/debugreg.h>
>  #include <linux/export.h>
>  #include <../kernel/smpboot.h>
>  #include "../../kernel/fpu/legacy.h"
> @@ -281,3 +283,99 @@ void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0)
>  	kernel_fpu_end();
>  }
>  EXPORT_SYMBOL(mshv_vtl_return_call);
> +
> +/* Static table mapping register names to their corresponding actions */
> +static const struct {
> +	enum hv_register_name reg_name;
> +	int debug_reg_num;  /* -1 if not a debug register */
> +	u32 msr_addr;       /* 0 if not an MSR */
> +} reg_table[] = {
> +	/* Debug registers */
> +	{HV_X64_REGISTER_DR0, 0, 0},
> +	{HV_X64_REGISTER_DR1, 1, 0},
> +	{HV_X64_REGISTER_DR2, 2, 0},
> +	{HV_X64_REGISTER_DR3, 3, 0},
> +	{HV_X64_REGISTER_DR6, 6, 0},
> +	/* MTRR MSRs */
> +	{HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap},
> +	{HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)},
> +	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000},
> +	{HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000},
> +};
> +
> +int hv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set, u64 shared)
> +{
> +	u64 *reg64;
> +	enum hv_register_name gpr_name;
> +	int i;
> +
> +	gpr_name = regs->name;
> +	reg64 = &regs->value.reg64;
> +
> +	/* Search for the register in the table */
> +	for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
> +		if (reg_table[i].reg_name != gpr_name)
> +			continue;
> +		if (reg_table[i].debug_reg_num != -1) {
> +			/* Handle debug registers */
> +			if (gpr_name == HV_X64_REGISTER_DR6 && !shared)
> +				goto hypercall;
> +			if (set)
> +				native_set_debugreg(reg_table[i].debug_reg_num, *reg64);
> +			else
> +				*reg64 = native_get_debugreg(reg_table[i].debug_reg_num);
> +		} else {
> +			/* Handle MSRs */
> +			if (set)
> +				wrmsrl(reg_table[i].msr_addr, *reg64);
> +			else
> +				rdmsrl(reg_table[i].msr_addr, *reg64);
> +		}
> +		return 0;
> +	}
> +
> +hypercall:
> +	return 1;
> +}
> +EXPORT_SYMBOL(hv_vtl_get_set_reg);
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index f64393e853ee..d5355a5b7517 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -304,6 +304,7 @@ void mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
>  void mshv_vtl_return_call_init(u64 vtl_return_offset);
>  void mshv_vtl_return_hypercall(void);
>  void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0);
> +int hv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set, u64 shared);

Can this move to asm-generic/mshyperv.h?  The function is no longer specific
to x86/x64, so one would want to not declare it in the arch/x86 version
of mshyperv.h. But maybe moving it to asm-generic/mshyperv.h breaks
compilation on arm64 because there's also the static inline stub there.

>  #else
>  static inline void __init hv_vtl_init_platform(void) {}
>  static inline int __init hv_vtl_early_init(void) { return 0; }
> diff --git a/drivers/hv/mshv_vtl_main.c b/drivers/hv/mshv_vtl_main.c
> index 5856975f32e1..b607b6e7e121 100644
> --- a/drivers/hv/mshv_vtl_main.c
> +++ b/drivers/hv/mshv_vtl_main.c
> @@ -19,10 +19,8 @@
>  #include <linux/poll.h>
>  #include <linux/file.h>
>  #include <linux/vmalloc.h>
> -#include <asm/debugreg.h>
>  #include <asm/mshyperv.h>
>  #include <trace/events/ipi.h>
> -#include <uapi/asm/mtrr.h>
>  #include <uapi/linux/mshv.h>
>  #include <hyperv/hvhdk.h>
> 
> @@ -505,102 +503,6 @@ static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *us
>  	return 0;
>  }
> 
> -/* Static table mapping register names to their corresponding actions */
> -static const struct {
> -	enum hv_register_name reg_name;
> -	int debug_reg_num;  /* -1 if not a debug register */
> -	u32 msr_addr;       /* 0 if not an MSR */
> -} reg_table[] = {
> -	/* Debug registers */
> -	{HV_X64_REGISTER_DR0, 0, 0},
> -	{HV_X64_REGISTER_DR1, 1, 0},
> -	{HV_X64_REGISTER_DR2, 2, 0},
> -	{HV_X64_REGISTER_DR3, 3, 0},
> -	{HV_X64_REGISTER_DR6, 6, 0},
> -	/* MTRR MSRs */
> -	{HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap},
> -	{HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)},
> -	{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000},
> -	{HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000},
> -};
> -
> -static int mshv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set)
> -{
> -	u64 *reg64;
> -	enum hv_register_name gpr_name;
> -	int i;
> -
> -	gpr_name = regs->name;
> -	reg64 = &regs->value.reg64;
> -
> -	/* Search for the register in the table */
> -	for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
> -		if (reg_table[i].reg_name != gpr_name)
> -			continue;
> -		if (reg_table[i].debug_reg_num != -1) {
> -			/* Handle debug registers */
> -			if (gpr_name == HV_X64_REGISTER_DR6 &&
> -			    !mshv_vsm_capabilities.dr6_shared)
> -				goto hypercall;
> -			if (set)
> -				native_set_debugreg(reg_table[i].debug_reg_num, *reg64);
> -			else
> -				*reg64 = native_get_debugreg(reg_table[i].debug_reg_num);
> -		} else {
> -			/* Handle MSRs */
> -			if (set)
> -				wrmsrl(reg_table[i].msr_addr, *reg64);
> -			else
> -				rdmsrl(reg_table[i].msr_addr, *reg64);
> -		}
> -		return 0;
> -	}
> -
> -hypercall:
> -	return 1;
> -}
> -
>  static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0)
>  {
>  	struct hv_vp_assist_page *hvp;
> @@ -720,7 +622,7 @@ mshv_vtl_ioctl_get_regs(void __user *user_args)
>  			   sizeof(reg)))
>  		return -EFAULT;
> 
> -	ret = mshv_vtl_get_set_reg(&reg, false);
> +	ret = hv_vtl_get_set_reg(&reg, false, mshv_vsm_capabilities.dr6_shared);
>  	if (!ret)
>  		goto copy_args; /* No need of hypercall */
>  	ret = vtl_get_vp_register(&reg);
> @@ -751,7 +653,7 @@ mshv_vtl_ioctl_set_regs(void __user *user_args)
>  	if (copy_from_user(&reg, (void __user *)args.regs_ptr, sizeof(reg)))
>  		return -EFAULT;
> 
> -	ret = mshv_vtl_get_set_reg(&reg, true);
> +	ret = hv_vtl_get_set_reg(&reg, true, mshv_vsm_capabilities.dr6_shared);
>  	if (!ret)
>  		return ret; /* No need of hypercall */
>  	ret = vtl_set_vp_register(&reg);
> --
> 2.43.0
> 


^ permalink raw reply

* RE: [PATCH 03/11] Drivers: hv: Add support to setup percpu vmbus handler
From: Michael Kelley @ 2026-04-01 16:55 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-4-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 
> Add a wrapper function - hv_setup_percpu_vmbus_handler(), similar to
> hv_setup_vmbus_handler() to allow setting up custom per-cpu VMBus
> interrupt handler. This is required for arm64 support, to be added
> in MSHV_VTL driver, where per-cpu VMBus interrupt handler will be
> set to mshv_vtl_vmbus_isr() for VTL2 (Virtual Trust Level 2).

Needing both hv_setup_vmbus_handler() and
hv_setup_percpu_vmbus_handler() seems unfortunate. Here's an
alternate approach to consider:

1. I think the x86 VMBus sysvec handler and the vmbus_percpu_isr()
functions could both use the same vmbus_handler global variable.
Looking at your changes in this patch set, hv_setup_vmbus_handler()
and hv_setup_percpu_vmbus_handler() are used together and always
set the same value.

2. So move the global variable vmbus_handler out from arch/x86
and into hv_common.c, and export it. The x86 sysvec handler can
still reference it, and vmbus_percpu_isr() in vmbus_drv.c can
also reference it.  No need to have vmbus_percpu_isr() under
arch/arm64 or have a stub in hv_common.c.

3. hv_setup_vmbus_handler() and hv_remove_vmbus_handler()
also move to hv_common.c.  The __weak stubs go away.

With these changes, only hv_setup_vmbus_handler() needs to
be called, and it works for both x86 with the sysvec handler and
for arm64 with vmbus_percpu_isr().

I haven't coded this up, so maybe there's some problematic detail,
but the idea seems like it would work. If it does work, some of my
comments below are no longer applicable.

> 
> Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com>
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  arch/arm64/hyperv/mshyperv.c   | 13 +++++++++++++
>  drivers/hv/hv_common.c         | 11 +++++++++++
>  drivers/hv/vmbus_drv.c         |  7 +------
>  include/asm-generic/mshyperv.h |  3 +++
>  4 files changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
> index 4fdc26ade1d7..d4494ceeaad0 100644
> --- a/arch/arm64/hyperv/mshyperv.c
> +++ b/arch/arm64/hyperv/mshyperv.c
> @@ -134,3 +134,16 @@ bool hv_is_hyperv_initialized(void)
>  	return hyperv_initialized;
>  }
>  EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized);
> +
> +static void (*vmbus_percpu_handler)(void);
> +void hv_setup_percpu_vmbus_handler(void (*handler)(void))
> +{
> +	vmbus_percpu_handler = handler;
> +}
> +
> +irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
> +{
> +	if (vmbus_percpu_handler)
> +		vmbus_percpu_handler();
> +	return IRQ_HANDLED;
> +}
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index d1ebc0ebd08f..a5064f558bf6 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -759,6 +759,17 @@ void __weak hv_setup_vmbus_handler(void (*handler)(void))
>  }
>  EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
> 
> +irqreturn_t __weak vmbus_percpu_isr(int irq, void *dev_id)
> +{
> +	return IRQ_HANDLED;
> +}
> +EXPORT_SYMBOL_GPL(vmbus_percpu_isr);
> +
> +void __weak hv_setup_percpu_vmbus_handler(void (*handler)(void))
> +{
> +}
> +EXPORT_SYMBOL_GPL(hv_setup_percpu_vmbus_handler);

You've implemented hv_setup_percpu_vmbus_handler() following
the pattern of hv_setup_vmbus_handler(), which is reasonable.
But that turns out to be unnecessarily complicated. The existing
hv_setup_vmbus_handler() has a portion in
arch/x86/kernel/cpu/mshyperv.c as a special case because it uses a
hard-coded interrupt vector on x86/x64, and has its own custom
sysvec code. And there's a need for a __weak stub in hv_common.c
so that vmbus_drv.c will compile on arm64.

But hv_setup_percpu_vmbus_handler() does not have the same
requirements. It could be implemented entirely in vmbus_drv.c,
with no code under arch/x86 or arch/arm64, and no __weak stub
in hv_common.c.  vmbus_drv.c would just need to
EXPORT_SYMBOL_FOR_MODULES, like it already does with vmbus_isr.
I didn't code it up, but I think that approach would be simpler with
fewer piece-parts scattered all over. If so, it would be worth
breaking the symmetry with hv_setup_vmbus_handler().

> +
>  void __weak hv_remove_vmbus_handler(void)
>  {
>  }
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index bc4fc1951ae1..f99d4f2d3862 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -1413,12 +1413,6 @@ void vmbus_isr(void)
>  }
>  EXPORT_SYMBOL_FOR_MODULES(vmbus_isr, "mshv_vtl");
> 
> -static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
> -{
> -	vmbus_isr();
> -	return IRQ_HANDLED;
> -}
> -
>  static void vmbus_percpu_work(struct work_struct *work)
>  {
>  	unsigned int cpu = smp_processor_id();
> @@ -1520,6 +1514,7 @@ static int vmbus_bus_init(void)
>  	if (vmbus_irq == -1) {
>  		hv_setup_vmbus_handler(vmbus_isr);
>  	} else {
> +		hv_setup_percpu_vmbus_handler(vmbus_isr);
>  		ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr,
>  				"Hyper-V VMbus", &vmbus_evt);
>  		if (ret) {
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index 108f135d4fd9..b147a12085e4 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -22,6 +22,7 @@
>  #include <linux/bitops.h>
>  #include <acpi/acpi_numa.h>
>  #include <linux/cpumask.h>
> +#include <linux/interrupt.h>
>  #include <linux/nmi.h>
>  #include <asm/ptrace.h>
>  #include <hyperv/hvhdk.h>
> @@ -179,6 +180,8 @@ static inline u64 hv_generate_guest_id(u64 kernel_version)
> 
>  int hv_get_hypervisor_version(union hv_hypervisor_version_info *info);
> 
> +irqreturn_t vmbus_percpu_isr(int irq, void *dev_id);
> +void hv_setup_percpu_vmbus_handler(void (*handler)(void));
>  void hv_setup_vmbus_handler(void (*handler)(void));
>  void hv_remove_vmbus_handler(void);
>  void hv_setup_stimer0_handler(void (*handler)(void));
> --
> 2.43.0
> 

^ permalink raw reply

* RE: [PATCH 02/11] Drivers: hv: Move hv_vp_assist_page to common files
From: Michael Kelley @ 2026-04-01 16:55 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-3-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 
> Move the logic to initialize and export hv_vp_assist_page from x86
> architecture code to Hyper-V common code to allow it to be used for
> upcoming arm64 support in MSHV_VTL driver.
> Note: This change also improves error handling - if VP assist page
> allocation fails, hyperv_init() now returns early instead of
> continuing with partial initialization.
> 
> Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  arch/x86/hyperv/hv_init.c      | 88 +---------------------------------
>  drivers/hv/hv_common.c         | 88 ++++++++++++++++++++++++++++++++++
>  include/asm-generic/mshyperv.h |  4 ++
>  include/hyperv/hvgdk_mini.h    |  2 +
>  4 files changed, 95 insertions(+), 87 deletions(-)
> 
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 323adc93f2dc..75a98b5e451b 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -81,9 +81,6 @@ union hv_ghcb * __percpu *hv_ghcb_pg;
>  /* Storage to save the hypercall page temporarily for hibernation */
>  static void *hv_hypercall_pg_saved;
> 
> -struct hv_vp_assist_page **hv_vp_assist_page;
> -EXPORT_SYMBOL_GPL(hv_vp_assist_page);
> -
>  static int hyperv_init_ghcb(void)
>  {
>  	u64 ghcb_gpa;
> @@ -117,59 +114,12 @@ static int hyperv_init_ghcb(void)
> 
>  static int hv_cpu_init(unsigned int cpu)
>  {
> -	union hv_vp_assist_msr_contents msr = { 0 };
> -	struct hv_vp_assist_page **hvp;
>  	int ret;
> 
>  	ret = hv_common_cpu_init(cpu);
>  	if (ret)
>  		return ret;
> 
> -	if (!hv_vp_assist_page)
> -		return 0;
> -
> -	hvp = &hv_vp_assist_page[cpu];
> -	if (hv_root_partition()) {
> -		/*
> -		 * For root partition we get the hypervisor provided VP assist
> -		 * page, instead of allocating a new page.
> -		 */
> -		rdmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> -		*hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
> -				PAGE_SIZE, MEMREMAP_WB);
> -	} else {
> -		/*
> -		 * The VP assist page is an "overlay" page (see Hyper-V TLFS's
> -		 * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
> -		 * out to make sure we always write the EOI MSR in
> -		 * hv_apic_eoi_write() *after* the EOI optimization is disabled
> -		 * in hv_cpu_die(), otherwise a CPU may not be stopped in the
> -		 * case of CPU offlining and the VM will hang.
> -		 */
> -		if (!*hvp) {
> -			*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
> -
> -			/*
> -			 * Hyper-V should never specify a VM that is a Confidential
> -			 * VM and also running in the root partition. Root partition
> -			 * is blocked to run in Confidential VM. So only decrypt assist
> -			 * page in non-root partition here.
> -			 */
> -			if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
> -				WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1));
> -				memset(*hvp, 0, PAGE_SIZE);
> -			}
> -		}
> -
> -		if (*hvp)
> -			msr.pfn = vmalloc_to_pfn(*hvp);
> -
> -	}
> -	if (!WARN_ON(!(*hvp))) {
> -		msr.enable = 1;
> -		wrmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> -	}
> -
>  	/* Allow Hyper-V stimer vector to be injected from Hypervisor. */
>  	if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE)
>  		apic_update_vector(cpu, HYPERV_STIMER0_VECTOR, true);
> @@ -286,23 +236,6 @@ static int hv_cpu_die(unsigned int cpu)
> 
>  	hv_common_cpu_die(cpu);
> 
> -	if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
> -		union hv_vp_assist_msr_contents msr = { 0 };
> -		if (hv_root_partition()) {
> -			/*
> -			 * For root partition the VP assist page is mapped to
> -			 * hypervisor provided page, and thus we unmap the
> -			 * page here and nullify it, so that in future we have
> -			 * correct page address mapped in hv_cpu_init.
> -			 */
> -			memunmap(hv_vp_assist_page[cpu]);
> -			hv_vp_assist_page[cpu] = NULL;
> -			rdmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> -			msr.enable = 0;
> -		}
> -		wrmsrq(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> -	}
> -
>  	if (hv_reenlightenment_cb == NULL)
>  		return 0;
> 
> @@ -460,21 +393,6 @@ void __init hyperv_init(void)
>  	if (hv_common_init())
>  		return;
> 
> -	/*
> -	 * The VP assist page is useless to a TDX guest: the only use we
> -	 * would have for it is lazy EOI, which can not be used with TDX.
> -	 */
> -	if (hv_isolation_type_tdx())
> -		hv_vp_assist_page = NULL;
> -	else
> -		hv_vp_assist_page = kzalloc_objs(*hv_vp_assist_page, nr_cpu_ids);
> -	if (!hv_vp_assist_page) {
> -		ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
> -
> -		if (!hv_isolation_type_tdx())
> -			goto common_free;
> -	}
> -
>  	if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
>  		/* Negotiate GHCB Version. */
>  		if (!hv_ghcb_negotiate_protocol())
> @@ -483,7 +401,7 @@ void __init hyperv_init(void)
> 
>  		hv_ghcb_pg = alloc_percpu(union hv_ghcb *);
>  		if (!hv_ghcb_pg)
> -			goto free_vp_assist_page;
> +			goto free_ghcb_page;
>  	}
> 
>  	cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online",
> @@ -613,10 +531,6 @@ void __init hyperv_init(void)
>  	cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE);
>  free_ghcb_page:
>  	free_percpu(hv_ghcb_pg);
> -free_vp_assist_page:
> -	kfree(hv_vp_assist_page);
> -	hv_vp_assist_page = NULL;
> -common_free:
>  	hv_common_free();
>  }
> 
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index 6b67ac616789..d1ebc0ebd08f 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -28,7 +28,9 @@
>  #include <linux/slab.h>
>  #include <linux/dma-map-ops.h>
>  #include <linux/set_memory.h>
> +#include <linux/vmalloc.h>
>  #include <hyperv/hvhdk.h>
> +#include <hyperv/hvgdk.h>
>  #include <asm/mshyperv.h>

Need to add

#include <linux/io.h>

because of the memremap() and related calls that have been added.
io.h is probably being #include'd indirectly, but it is better to #include
it directly.

> 
>  u64 hv_current_partition_id = HV_PARTITION_ID_SELF;
> @@ -78,6 +80,8 @@ static struct ctl_table_header *hv_ctl_table_hdr;
>  u8 * __percpu *hv_synic_eventring_tail;
>  EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
> 
> +struct hv_vp_assist_page **hv_vp_assist_page;
> +EXPORT_SYMBOL_GPL(hv_vp_assist_page);
>  /*
>   * Hyper-V specific initialization and shutdown code that is
>   * common across all architectures.  Called from architecture
> @@ -92,6 +96,9 @@ void __init hv_common_free(void)
>  	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
>  		hv_kmsg_dump_unregister();
> 
> +	kfree(hv_vp_assist_page);
> +	hv_vp_assist_page = NULL;
> +
>  	kfree(hv_vp_index);
>  	hv_vp_index = NULL;
> 
> @@ -394,6 +401,23 @@ int __init hv_common_init(void)
>  	for (i = 0; i < nr_cpu_ids; i++)
>  		hv_vp_index[i] = VP_INVAL;
> 
> +	/*
> +	 * The VP assist page is useless to a TDX guest: the only use we
> +	 * would have for it is lazy EOI, which can not be used with TDX.
> +	 */
> +	if (hv_isolation_type_tdx()) {
> +		hv_vp_assist_page = NULL;
> +	} else {
> +		hv_vp_assist_page = kzalloc_objs(*hv_vp_assist_page, nr_cpu_ids);
> +		if (!hv_vp_assist_page) {
> +#ifdef CONFIG_X86_64
> +			ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
> +#endif
> +			hv_common_free();
> +			return -ENOMEM;

Given that "failure to allocate memory" now returns an error that is
essentially fatal to hyperv_init(), is it still necessary to clear the flag in
ms_hyperv.hints?  I'd love to see that #ifdef go away. It's the only
#ifdef in hv_common.c, and I had worked hard in the past to avoid
such #ifdef's. :-)

> +		}
> +	}
> +
>  	return 0;
>  }
> 
> @@ -471,6 +495,8 @@ void __init ms_hyperv_late_init(void)
> 
>  int hv_common_cpu_init(unsigned int cpu)
>  {
> +	union hv_vp_assist_msr_contents msr = { 0 };
> +	struct hv_vp_assist_page **hvp;
>  	void **inputarg, **outputarg;
>  	u8 **synic_eventring_tail;
>  	u64 msr_vp_index;
> @@ -542,6 +568,50 @@ int hv_common_cpu_init(unsigned int cpu)
>  			ret = -ENOMEM;

The Sashiko AI comment here about a bug when ret is set to -ENOMEM
seems valid to me.

>  	}
> 
> +	if (!hv_vp_assist_page)
> +		return ret;
> +
> +	hvp = &hv_vp_assist_page[cpu];
> +	if (hv_root_partition()) {
> +		/*
> +		 * For root partition we get the hypervisor provided VP assist
> +		 * page, instead of allocating a new page.
> +		 */
> +		msr.as_uint64 = hv_get_msr(HV_SYN_REG_VP_ASSIST_PAGE);
> +		*hvp = memremap(msr.pfn << HV_VP_ASSIST_PAGE_ADDRESS_SHIFT,
> +				PAGE_SIZE, MEMREMAP_WB);

The Sashiko AI comment about potentially memremap'ing 64K instead of 4K can
be ignored. We know that the root partition can only run with a 4K page size,
and that is enforced in drivers/hv/Kconfig.

HV_VP_ASSIST_PAGE_ADDRESS_SHIFT is defined in asm-generic/mshyperv.h.
But there is also HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT in hvgdk_mini.h.
Is there a clean way to eliminate the duplication?

> +	} else {
> +		/*
> +		 * The VP assist page is an "overlay" page (see Hyper-V TLFS's
> +		 * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
> +		 * out to make sure we always write the EOI MSR in
> +		 * hv_apic_eoi_write() *after* the EOI optimization is disabled
> +		 * in hv_cpu_die(), otherwise a CPU may not be stopped in the
> +		 * case of CPU offlining and the VM will hang.
> +		 */

Somewhere in the comment above, I'd suggest adding a short "on x86/x64"
qualifier, as the comment doesn't apply on arm64 since it doesn't support
the AutoEOI optimization.  Maybe "Here it must be zeroed out to make sure
that on x86/x64 we always write the EOI MSR in ....".   

> +		if (!*hvp) {
> +			*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);

The Sashiko AI comment about using "flags" instead of GFP_KERNEL seems valid.

> +
> +			/*
> +			 * Hyper-V should never specify a VM that is a Confidential
> +			 * VM and also running in the root partition. Root partition
> +			 * is blocked to run in Confidential VM. So only decrypt assist
> +			 * page in non-root partition here.
> +			 */
> +			if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) {
> +				WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1));
> +				memset(*hvp, 0, PAGE_SIZE);
> +			}
> +		}
> +
> +		if (*hvp)
> +			msr.pfn = vmalloc_to_pfn(*hvp);

The Sashiko AI comment about page size here seems valid. But what are the rules
about arm64 page sizes that are supported for VTL2, and how does they relate
to VTL0 allowing 4K, 16K, and 64K page size? What combinations are allowed?
For example, can a VTL2 built with 4K page size run with a VTL0 built with
64K page size? It would be nice to have the rules recorded somewhere in a
code comment, but I'm not sure of the best place.

But regardless of the rules, I'd suggest future-proofing by using
"page_to_hvpfn(vmalloc_to_page(*hvp))" so that the PFN generated is always
in terms of 4K page size as the Hyper-V host expects.

> +	}
> +	if (!WARN_ON(!(*hvp))) {
> +		msr.enable = 1;
> +		hv_set_msr(HV_SYN_REG_VP_ASSIST_PAGE, msr.as_uint64);
> +	}
> +
>  	return ret;
>  }
> 
> @@ -566,6 +636,24 @@ int hv_common_cpu_die(unsigned int cpu)
>  		*synic_eventring_tail = NULL;
>  	}
> 
> +	if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
> +		union hv_vp_assist_msr_contents msr = { 0 };
> +
> +		if (hv_root_partition()) {
> +			/*
> +			 * For root partition the VP assist page is mapped to
> +			 * hypervisor provided page, and thus we unmap the
> +			 * page here and nullify it, so that in future we have
> +			 * correct page address mapped in hv_cpu_init.
> +			 */
> +			memunmap(hv_vp_assist_page[cpu]);
> +			hv_vp_assist_page[cpu] = NULL;
> +			msr.as_uint64 = hv_get_msr(HV_SYN_REG_VP_ASSIST_PAGE);
> +			msr.enable = 0;
> +		}
> +		hv_set_msr(HV_SYN_REG_VP_ASSIST_PAGE, msr.as_uint64);
> +	}
> +
>  	return 0;
>  }
> 
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index d37b68238c97..108f135d4fd9 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -25,6 +25,7 @@
>  #include <linux/nmi.h>
>  #include <asm/ptrace.h>
>  #include <hyperv/hvhdk.h>
> +#include <hyperv/hvgdk.h>
> 
>  #define VTPM_BASE_ADDRESS 0xfed40000
> 
> @@ -299,6 +300,8 @@ do { \
>  #define hv_status_debug(status, fmt, ...) \
>  	hv_status_printk(debug, status, fmt, ##__VA_ARGS__)
> 
> +extern struct hv_vp_assist_page **hv_vp_assist_page;

This "extern" statement is added here so it is visible to both x86/x64 and arm64.
And that's correct.

But there is still some VP assist page stuff that has been left in the arch/x86
version of mshyperv.h.  That other stuff, including the inline function
hv_get_vp_assist_page(), should also be moved to asm-generic/mshyperv.h.
Given that the VP assist page support is now fully generic and not x86/x64
specific, it shouldn't occur anywhere in the arch/x86 version of mshyperv.h.

> +
>  const char *hv_result_to_string(u64 hv_status);
>  int hv_result_to_errno(u64 status);
>  void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die);
> @@ -377,6 +380,7 @@ static inline int hv_deposit_memory(u64 partition_id, u64 status)
>  	return hv_deposit_memory_node(NUMA_NO_NODE, partition_id, status);
>  }
> 
> +#define HV_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
>  #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
>  u8 __init get_vtl(void);
>  #else
> diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
> index 056ef7b6b360..be697ddb211a 100644
> --- a/include/hyperv/hvgdk_mini.h
> +++ b/include/hyperv/hvgdk_mini.h
> @@ -149,6 +149,7 @@ struct hv_u128 {
>  #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
>  #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
>  		(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> +#define HV_SYN_REG_VP_ASSIST_PAGE              (HV_X64_MSR_VP_ASSIST_PAGE)
> 
>  /* Hyper-V Enlightened VMCS version mask in nested features CPUID */
>  #define HV_X64_ENLIGHTENED_VMCS_VERSION		0xff
> @@ -1185,6 +1186,7 @@ enum hv_register_name {
> 
>  #define HV_MSR_STIMER0_CONFIG	(HV_REGISTER_STIMER0_CONFIG)
>  #define HV_MSR_STIMER0_COUNT	(HV_REGISTER_STIMER0_COUNT)
> +#define HV_SYN_REG_VP_ASSIST_PAGE    (HV_REGISTER_VP_ASSIST_PAGE)

This defines a new register name prefix "HV_SYN_REG_" that isn't used
anywhere else. The prefixes for Hyper-V register names are already complex
to account to x86/x64 and arm64 differences, and the fact the x86/x64 has
synthetic MSRs, while arm64 does not. So introducing another prefix is
undesirable. Couldn't this just be HV_MSR_VP_ASSIST_PAGE using the
same structure as HV_MSR_STIMER0_COUNT (for example)?

> 
>  #endif /* CONFIG_ARM64 */
> 
> --
> 2.43.0
> 


^ permalink raw reply

* RE: [PATCH 01/11] arch: arm64: Export arch_smp_send_reschedule for mshv_vtl module
From: Michael Kelley @ 2026-04-01 16:54 UTC (permalink / raw)
  To: Naman Jain, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
	Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, Michael Kelley,
	linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <20260316121241.910764-2-namjain@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> 

Nit: For the patch "Subject", the most common prefix for the file
arch/arm64/kernel/smp.c is "arm64: smp:".  I'd suggest using that
prefix for historical consistency.

> mshv_vtl_main.c calls smp_send_reschedule() which expands to
> arch_smp_send_reschedule(). When CONFIG_MSHV_VTL=m, the module cannot
> access this symbol since it is not exported on arm64.
> 
> smp_send_reschedule() is used in mshv_vtl_cancel() to interrupt a vCPU
> thread running on another CPU. When a vCPU is looping in
> mshv_vtl_ioctl_return_to_lower_vtl(), it checks a per-CPU cancel flag
> before each VTL0 entry. Setting cancel=1 alone is not enough if the
> target CPU thread is sleeping - the IPI from smp_send_reschedule() kicks
> the remote CPU out of idle/sleep so it re-checks the cancel flag and
> exits the loop promptly.
> 
> Other architectures (riscv, loongarch, powerpc) already export this
> symbol. Add the same EXPORT_SYMBOL_GPL for arm64. This is required
> for adding arm64 support in MSHV_VTL.
> 
> Signed-off-by: Naman Jain <namjain@linux.microsoft.com>
> ---
>  arch/arm64/kernel/smp.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 1aa324104afb..26b1a4456ceb 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -1152,6 +1152,7 @@ void arch_smp_send_reschedule(int cpu)
>  {
>  	smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
>  }
> +EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
> 
>  #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
>  void arch_send_wakeup_ipi(unsigned int cpu)
> --
> 2.43.0
> 

The "Subject" nit notwithstanding,

Reviewed-by: Michael Kelley <mhklinux@outlook.com>


^ permalink raw reply

* RE: [PATCH 00/11] Drivers: hv: Add ARM64 support in mshv_vtl
From: Michael Kelley @ 2026-04-01 16:54 UTC (permalink / raw)
  To: Naman Jain, Michael Kelley, K . Y . Srinivasan, Haiyang Zhang,
	Wei Liu, Dexuan Cui, Long Li, Catalin Marinas, Will Deacon,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Arnd Bergmann, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Ghiti
  Cc: Marc Zyngier, Timothy Hayes, Lorenzo Pieralisi, mrigendrachaubey,
	ssengar@linux.microsoft.com, linux-hyperv@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-riscv@lists.infradead.org
In-Reply-To: <a8f856a0-49a8-4041-9036-4e9ade79532b@linux.microsoft.com>

From: Naman Jain <namjain@linux.microsoft.com> Sent: Tuesday, March 17, 2026 9:23 PM
> 
> On 3/18/2026 3:33 AM, Michael Kelley wrote:
> > From: Naman Jain <namjain@linux.microsoft.com> Sent: Monday, March 16, 2026 5:13 AM
> >>
> >> The series intends to add support for ARM64 to mshv_vtl driver.

No need to be tentative. :-)  Just write as:

"The series adds support for ARM64 to the mshv_vtl driver."

> >> For this, common Hyper-V code is refactored, necessary support is added,
> >> mshv_vtl_main.c is refactored and then finally support is added in
> >> Kconfig.
> >>
> >> Based on commit 1f318b96cc84 ("Linux 7.0-rc3")
> >
> > There's now an online LLM-based tool that is automatically reviewing
> > kernel patches. For this patch set, the results are here:
> >
> > https://sashiko.dev/#/patchset/20260316121241.910764-1-namjain%40linux.microsoft.com
> >
> > It has flagged several things that are worth checking, but I haven't
> > reviewed them to see if they are actually valid.
> >
> > FWIW, the announcement about sashiko.dev is here:
> >
> > https://lore.kernel.org/lkml/7ia4o6kmpj5s.fsf@castle.c.googlers.com/
> >
> > Michael
> 
> 
> Thanks for sharing Michael,
> I'll check it out and do the needful.
> 

I've done a full review of this patch set and provided comments in the
individual patches. Some of my comments reference the Sashiko AI
comments, but there are still some Sashiko AI comments to consider
that I haven't referenced.

FWIW, the Sashiko AI comments are quite good -- it found some things
here that I missed on my own, and in my earlier reviews of the original VTL
code. :-(

Michael

^ permalink raw reply

* Re: [PATCH v2] Drivers: hv: mshv: fix integer overflow in memory region overlap check
From: Junrui Luo @ 2026-04-01 15:23 UTC (permalink / raw)
  To: Stanislav Kinsburskii
  Cc: K. Y. Srinivasan, Haiyang Zhang, Wei Liu, Dexuan Cui, Long Li,
	Nuno Das Neves, Anirudh Rayabharam, Mukesh Rathor, Muminul Islam,
	Praveen K Paladugu, Jinank Jain, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org, Yuhao Jiang, Roman Kisel,
	stable@vger.kernel.org
In-Reply-To: <acrnkcG5_u0RCydx@skinsburskii.localdomain>

Hi Stanislav,

Thanks for the review.

On Mon, Mar 30, 2026 at 02:13:53PM -0700, Stanislav Kinsburskii wrote:
> Minor nit: just "end" or even "tmp" would be sufficient, since it's only
> used for the overflow checks. "new_region_end" is a bit verbose and it's
> not really "new" per se.

I will rename it to “region_end" in v3.

> This is a PFN, so the check should be against MAX_PHYSMEM_BITS -
> PAGE_SHIFT, right?

HVPFN_DOWN() is defined as (x) >> HV_HYP_PAGE_SHIFT, so
HVPFN_DOWN(1ULL << MAX_PHYSMEM_BITS) already expands to
1ULL << (MAX_PHYSMEM_BITS - HV_HYP_PAGE_SHIFT).

Thanks,
Junrui Luo


^ permalink raw reply

* [PATCH 2/2] hv: vmbus: Remove vmbus_irq_initialized
From: Sebastian Andrzej Siewior @ 2026-04-01 15:15 UTC (permalink / raw)
  To: linux-hyperv, linux-rt-devel
  Cc: K. Y. Srinivasan, Dexuan Cui, Haiyang Zhang, Jan Kiszka, Long Li,
	Michael Kelley, Wei Liu, Sebastian Andrzej Siewior
In-Reply-To: <20260401151517.1743555-1-bigeasy@linutronix.de>

vmbus_irq_initialized is only true if the registration of the per-CPU
threads succeeded. If it failed, the whole registration aborts and the
vmbus_exit() path is never called.

Remove vmbus_irq_initialized.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 drivers/hv/vmbus_drv.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e44275370ac2a..7417841cd1f70 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1392,8 +1392,6 @@ static void run_vmbus_irqd(unsigned int cpu)
 	__vmbus_isr();
 }
 
-static bool vmbus_irq_initialized;
-
 static struct smp_hotplug_thread vmbus_irq_threads = {
 	.store                  = &vmbus_irqd,
 	.setup			= vmbus_irqd_setup,
@@ -1513,11 +1511,10 @@ static int vmbus_bus_init(void)
 	 * the VMbus interrupt handler.
 	 */
 
-	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !vmbus_irq_initialized) {
+	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
 		ret = smpboot_register_percpu_thread(&vmbus_irq_threads);
 		if (ret)
 			goto err_kthread;
-		vmbus_irq_initialized = true;
 	}
 
 	if (vmbus_irq == -1) {
@@ -1561,10 +1558,8 @@ static int vmbus_bus_init(void)
 	else
 		free_percpu_irq(vmbus_irq, &vmbus_evt);
 err_setup:
-	if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
 		smpboot_unregister_percpu_thread(&vmbus_irq_threads);
-		vmbus_irq_initialized = false;
-	}
 err_kthread:
 	bus_unregister(&hv_bus);
 	return ret;
@@ -3033,10 +3028,9 @@ static void __exit vmbus_exit(void)
 		hv_remove_vmbus_handler();
 	else
 		free_percpu_irq(vmbus_irq, &vmbus_evt);
-	if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
 		smpboot_unregister_percpu_thread(&vmbus_irq_threads);
-		vmbus_irq_initialized = false;
-	}
+
 	for_each_online_cpu(cpu) {
 		struct hv_per_cpu_context *hv_cpu
 			= per_cpu_ptr(hv_context.cpu_context, cpu);
-- 
2.53.0


^ permalink raw reply related

* [PATCH 0/2] hv: vmbus: Small cleanups
From: Sebastian Andrzej Siewior @ 2026-04-01 15:15 UTC (permalink / raw)
  To: linux-hyperv, linux-rt-devel
  Cc: K. Y. Srinivasan, Dexuan Cui, Haiyang Zhang, Jan Kiszka, Long Li,
	Michael Kelley, Wei Liu, Sebastian Andrzej Siewior

Replacing the lockdep_hardirq_threaded() annotation which does not
belong in drivers and removing the vmbus_irq_initialized which seems
redundant.

Sebastian Andrzej Siewior (2):
  hv: vmbus: Replace lockdep_hardirq_threaded() with lockdep annotation
  hv: vmbus: Remove vmbus_irq_initialized

 drivers/hv/vmbus_drv.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

-- 
2.53.0


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox