The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: Mukesh R <mrathor@linux.microsoft.com>
To: hpa@zytor.com, robin.murphy@arm.com, robh@kernel.org,
	wei.liu@kernel.org, mrathor@linux.microsoft.com,
	mhklinux@outlook.com, muislam@microsoft.com,
	namjain@linux.microsoft.com, magnuskulke@linux.microsoft.com,
	anbelski@linux.microsoft.com, linux-kernel@vger.kernel.org,
	linux-hyperv@vger.kernel.org, iommu@lists.linux.dev,
	linux-pci@vger.kernel.org, linux-arch@vger.kernel.org
Cc: kys@microsoft.com, haiyangz@microsoft.com, decui@microsoft.com,
	longli@microsoft.com, tglx@kernel.org, mingo@redhat.com,
	bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org,
	joro@8bytes.org, will@kernel.org, lpieralisi@kernel.org,
	kwilczynski@kernel.org, bhelgaas@google.com, arnd@arndb.de,
	jacob.pan@linux.microsoft.com
Subject: [PATCH V3 10/11] mshv: Populate mmio mappings for PCI passthru
Date: Mon, 11 May 2026 19:02:58 -0700	[thread overview]
Message-ID: <20260512020259.1678627-11-mrathor@linux.microsoft.com> (raw)
In-Reply-To: <20260512020259.1678627-1-mrathor@linux.microsoft.com>

Upon guest access, in case of missing mmio mapping, the hypervisor
generates an unmapped gpa intercept. In this path, lookup the PCI
resource pfn for the guest gpa, and ask the hypervisor to map it
via hypercall. The PCI resource pfn is maintained by the VFIO driver,
and obtained via fixup_user_fault call (similar to KVM).

Also, VFIO no longer puts the mmio pfn in vma->vm_pgoff. So, remove
code that is using it to map mmio space. It is broken and will cause
panic.

Signed-off-by: Mukesh R <mrathor@linux.microsoft.com>
---
 drivers/hv/mshv_root_main.c | 113 ++++++++++++++++++++++++++++++------
 1 file changed, 96 insertions(+), 17 deletions(-)

diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
index 6ceb5f608589..a7864463961b 100644
--- a/drivers/hv/mshv_root_main.c
+++ b/drivers/hv/mshv_root_main.c
@@ -46,6 +46,9 @@ MODULE_DESCRIPTION("Microsoft Hyper-V root partition VMM interface /dev/mshv");
 #define HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED 95
 #endif
 
+static bool hv_nofull_mmio;	/* don't map entire mmio region upon fault */
+module_param(hv_nofull_mmio, bool, 0644);
+
 struct mshv_root mshv_root;
 
 enum hv_scheduler_type hv_scheduler_type;
@@ -641,6 +644,94 @@ mshv_partition_region_by_gfn_get(struct mshv_partition *p, u64 gfn)
 	return region;
 }
 
+/*
+ * Check if uaddr is for mmio range. If yes, return 0 with mmio_pfn filled in
+ * else just return -errno.
+ */
+static int mshv_chk_get_mmio_start_pfn(u64 uaddr, u64 *mmio_pfnp)
+{
+	struct vm_area_struct *vma;
+	bool is_mmio;
+	struct follow_pfnmap_args pfnmap_args;
+	int rc = -EINVAL;
+
+	mmap_read_lock(current->mm);
+	vma = vma_lookup(current->mm, uaddr);
+	is_mmio = vma ? !!(vma->vm_flags & (VM_IO | VM_PFNMAP)) : 0;
+	if (!is_mmio)
+		goto unlock_mmap_out;
+
+	pfnmap_args.vma = vma;
+	pfnmap_args.address = uaddr;
+
+	rc = follow_pfnmap_start(&pfnmap_args);
+	if (rc) {
+		rc = fixup_user_fault(current->mm, uaddr, FAULT_FLAG_WRITE,
+				      NULL);
+		if (rc)
+			goto unlock_mmap_out;
+
+		rc = follow_pfnmap_start(&pfnmap_args);
+		if (rc)
+			goto unlock_mmap_out;
+	}
+
+	*mmio_pfnp = pfnmap_args.pfn;
+	follow_pfnmap_end(&pfnmap_args);
+
+unlock_mmap_out:
+	mmap_read_unlock(current->mm);
+	return rc;
+}
+
+/*
+ * Check if the unmapped gpa belongs to mmio space. If yes, resolve it.
+ *
+ * Returns: True if valid mmio intercept and handled, else false.
+ */
+static bool mshv_handle_unmapped_gpa(struct mshv_vp *vp)
+{
+	struct hv_message *hvmsg = vp->vp_intercept_msg_page;
+	u64 gfn, uaddr, mmio_spa, numpgs;
+	struct mshv_mem_region *rg;
+	int rc = -EINVAL;
+	struct mshv_partition *pt = vp->vp_partition;
+#if defined(CONFIG_X86_64)
+	struct hv_x64_memory_intercept_message *msg =
+		(struct hv_x64_memory_intercept_message *)hvmsg->u.payload;
+#elif defined(CONFIG_ARM64)
+	struct hv_arm64_memory_intercept_message *msg =
+		(struct hv_arm64_memory_intercept_message *)hvmsg->u.payload;
+#endif
+
+	gfn = msg->guest_physical_address >> HV_HYP_PAGE_SHIFT;
+
+	rg = mshv_partition_region_by_gfn_get(pt, gfn);
+	if (rg == NULL)
+		return false;
+	if (rg->mreg_type != MSHV_REGION_TYPE_MMIO)
+		goto put_rg_out;
+
+	uaddr = rg->start_uaddr + ((gfn - rg->start_gfn) << HV_HYP_PAGE_SHIFT);
+
+	rc = mshv_chk_get_mmio_start_pfn(uaddr, &mmio_spa);
+	if (rc)
+		goto put_rg_out;
+
+	if (!hv_nofull_mmio) {		/* default case */
+		mmio_spa = mmio_spa - (gfn - rg->start_gfn);
+		gfn = rg->start_gfn;
+		numpgs = rg->nr_pages;
+	} else
+		numpgs = 1;
+
+	rc = hv_call_map_mmio_pages(pt->pt_id, gfn, mmio_spa, numpgs);
+
+put_rg_out:
+	mshv_region_put(rg);
+	return rc == 0;
+}
+
 /**
  * mshv_handle_gpa_intercept - Handle GPA (Guest Physical Address) intercepts.
  * @vp: Pointer to the virtual processor structure.
@@ -699,6 +790,8 @@ static bool mshv_handle_gpa_intercept(struct mshv_vp *vp)
 static bool mshv_vp_handle_intercept(struct mshv_vp *vp)
 {
 	switch (vp->vp_intercept_msg_page->header.message_type) {
+	case HVMSG_UNMAPPED_GPA:
+		return mshv_handle_unmapped_gpa(vp);
 	case HVMSG_GPA_INTERCEPT:
 		return mshv_handle_gpa_intercept(vp);
 	}
@@ -1322,16 +1415,8 @@ static int mshv_prepare_pinned_region(struct mshv_mem_region *region)
 }
 
 /*
- * This maps two things: guest RAM and for pci passthru mmio space.
- *
- * mmio:
- *  - vfio overloads vm_pgoff to store the mmio start pfn/spa.
- *  - Two things need to happen for mapping mmio range:
- *	1. mapped in the uaddr so VMM can access it.
- *	2. mapped in the hwpt (gfn <-> mmio phys addr) so guest can access it.
- *
- *   This function takes care of the second. The first one is managed by vfio,
- *   and hence is taken care of via vfio_pci_mmap_fault().
+ * This is called for both user ram and mmio space. The mmio space is not
+ * mapped here, but later during intercept on demand.
  */
 static long
 mshv_map_user_memory(struct mshv_partition *partition,
@@ -1340,7 +1425,6 @@ mshv_map_user_memory(struct mshv_partition *partition,
 	struct mshv_mem_region *region;
 	struct vm_area_struct *vma;
 	bool is_mmio;
-	ulong mmio_pfn;
 	long ret;
 
 	if (mem->flags & BIT(MSHV_SET_MEM_BIT_UNMAP) ||
@@ -1350,7 +1434,6 @@ mshv_map_user_memory(struct mshv_partition *partition,
 	mmap_read_lock(current->mm);
 	vma = vma_lookup(current->mm, mem->userspace_addr);
 	is_mmio = vma ? !!(vma->vm_flags & (VM_IO | VM_PFNMAP)) : 0;
-	mmio_pfn = is_mmio ? vma->vm_pgoff : 0;
 	mmap_read_unlock(current->mm);
 
 	if (!vma)
@@ -1376,11 +1459,7 @@ mshv_map_user_memory(struct mshv_partition *partition,
 					    region->nr_pages,
 					    HV_MAP_GPA_NO_ACCESS, NULL);
 		break;
-	case MSHV_REGION_TYPE_MMIO:
-		ret = hv_call_map_mmio_pages(partition->pt_id,
-					     region->start_gfn,
-					     mmio_pfn,
-					     region->nr_pages);
+	default:
 		break;
 	}
 
-- 
2.51.2.vfs.0.1


  parent reply	other threads:[~2026-05-12  2:03 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-12  2:02 [PATCH V3 00/11] PCI passthru on Hyper-V (Part I) Mukesh R
2026-05-12  2:02 ` [PATCH V3 01/11] iommu/hyperv: Rename hyperv-iommu.c to hyperv-irq.c Mukesh R
2026-05-12 10:26   ` Souradeep Chakrabarti
2026-05-12 23:46   ` Jacob Pan
2026-05-13  1:31     ` Mukesh R
2026-05-13  3:15     ` Michael Kelley
2026-05-12  2:02 ` [PATCH V3 02/11] x86/hyperv: Cosmetic changes in irqdomain.c for readability Mukesh R
2026-05-12 10:27   ` Souradeep Chakrabarti
2026-05-12  2:02 ` [PATCH V3 03/11] mshv: Provide a way to get partition ID if running in a VMM process Mukesh R
2026-05-12  2:02 ` [PATCH V3 04/11] mshv: Declarations and definitions for VFIO-MSHV bridge device Mukesh R
2026-05-12 10:26   ` Souradeep Chakrabarti
2026-05-12  2:02 ` [PATCH V3 05/11] mshv: Implement mshv bridge device for VFIO Mukesh R
2026-05-12  2:02 ` [PATCH V3 06/11] mshv: Add ioctl support for MSHV-VFIO bridge device Mukesh R
2026-05-12  2:02 ` [PATCH V3 07/11] mshv: Import data structs around device passthru from hyperv headers Mukesh R
2026-05-12  2:02 ` [PATCH V3 08/11] PCI: hv: VMBus and PCI device IDs for PCI passthru Mukesh R
2026-05-12 17:41   ` Bjorn Helgaas
2026-05-12  2:02 ` [PATCH V3 09/11] x86/hyperv: Implement Hyper-V virtual IOMMU Mukesh R
2026-05-12  2:02 ` Mukesh R [this message]
2026-05-12  2:02 ` [PATCH V3 11/11] mshv: Mark mem regions as non-movable upfront if device passthru Mukesh R

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260512020259.1678627-11-mrathor@linux.microsoft.com \
    --to=mrathor@linux.microsoft.com \
    --cc=anbelski@linux.microsoft.com \
    --cc=arnd@arndb.de \
    --cc=bhelgaas@google.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=decui@microsoft.com \
    --cc=haiyangz@microsoft.com \
    --cc=hpa@zytor.com \
    --cc=iommu@lists.linux.dev \
    --cc=jacob.pan@linux.microsoft.com \
    --cc=joro@8bytes.org \
    --cc=kwilczynski@kernel.org \
    --cc=kys@microsoft.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=longli@microsoft.com \
    --cc=lpieralisi@kernel.org \
    --cc=magnuskulke@linux.microsoft.com \
    --cc=mhklinux@outlook.com \
    --cc=mingo@redhat.com \
    --cc=muislam@microsoft.com \
    --cc=namjain@linux.microsoft.com \
    --cc=robh@kernel.org \
    --cc=robin.murphy@arm.com \
    --cc=tglx@kernel.org \
    --cc=wei.liu@kernel.org \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox