* [PATCH v3 1/4] xen/io: provide helpers for multi size MMIO accesses
2025-04-17 15:25 [PATCH v3 0/4] xen/x86: fix implementation of subpage r/o MMIO Roger Pau Monne
@ 2025-04-17 15:25 ` Roger Pau Monne
2025-04-17 15:32 ` Jan Beulich
2025-04-17 15:25 ` [PATCH v3 2/4] x86/hvm: fix handling of accesses to partial r/o MMIO pages Roger Pau Monne
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Roger Pau Monne @ 2025-04-17 15:25 UTC (permalink / raw)
To: xen-devel
Cc: Roger Pau Monne, Jan Beulich, Andrew Cooper, Anthony PERARD,
Michal Orzel, Julien Grall, Stefano Stabellini
Several handlers have the same necessity of reading or writing from or to
an MMIO region using 1, 2, 4 or 8 bytes accesses. So far this has been
open-coded in the function itself. Instead provide a new set of handlers
that encapsulate the accesses.
Since the added helpers are not architecture specific, introduce a new
generic io.h header.
No functional change intended.
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v2:
- Keep error message when handling non-allowed access sizes.
Changes since v1:
- Limit {read,write}q() to 64bit architectures.
- Always have a default case in switch statement.
---
xen/arch/x86/hvm/vmsi.c | 47 ++-------------------------
xen/arch/x86/mm.c | 30 +++++-------------
xen/drivers/vpci/msix.c | 47 ++-------------------------
xen/include/xen/io.h | 70 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 83 insertions(+), 111 deletions(-)
create mode 100644 xen/include/xen/io.h
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index fd83abb929ec..61b89834d97d 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -24,6 +24,7 @@
* Will be merged it with virtual IOAPIC logic, since most is the same
*/
+#include <xen/io.h>
#include <xen/types.h>
#include <xen/mm.h>
#include <xen/xmalloc.h>
@@ -304,28 +305,7 @@ static void adjacent_read(
hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address);
- switch ( len )
- {
- case 1:
- *pval = readb(hwaddr);
- break;
-
- case 2:
- *pval = readw(hwaddr);
- break;
-
- case 4:
- *pval = readl(hwaddr);
- break;
-
- case 8:
- *pval = readq(hwaddr);
- break;
-
- default:
- ASSERT_UNREACHABLE();
- break;
- }
+ *pval = read_mmio(hwaddr, len);
}
static void adjacent_write(
@@ -344,28 +324,7 @@ static void adjacent_write(
hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address);
- switch ( len )
- {
- case 1:
- writeb(val, hwaddr);
- break;
-
- case 2:
- writew(val, hwaddr);
- break;
-
- case 4:
- writel(val, hwaddr);
- break;
-
- case 8:
- writeq(val, hwaddr);
- break;
-
- default:
- ASSERT_UNREACHABLE();
- break;
- }
+ write_mmio(hwaddr, val, len);
}
static int cf_check msixtbl_read(
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1cf236516789..8baac132b484 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -95,6 +95,7 @@
#include <xen/guest_access.h>
#include <xen/hypercall.h>
#include <xen/init.h>
+#include <xen/io.h>
#include <xen/iocap.h>
#include <xen/ioreq.h>
#include <xen/irq.h>
@@ -116,7 +117,6 @@
#include <asm/flushtlb.h>
#include <asm/guest.h>
#include <asm/idt.h>
-#include <asm/io.h>
#include <asm/io_apic.h>
#include <asm/ldt.h>
#include <asm/mem_sharing.h>
@@ -5102,7 +5102,7 @@ static void __iomem *subpage_mmio_map_page(
static void subpage_mmio_write_emulate(
mfn_t mfn,
unsigned int offset,
- const void *data,
+ unsigned long data,
unsigned int len)
{
struct subpage_ro_range *entry;
@@ -5131,26 +5131,8 @@ static void subpage_mmio_write_emulate(
return;
}
- addr += offset;
- switch ( len )
- {
- case 1:
- writeb(*(const uint8_t*)data, addr);
- break;
- case 2:
- writew(*(const uint16_t*)data, addr);
- break;
- case 4:
- writel(*(const uint32_t*)data, addr);
- break;
- case 8:
- writeq(*(const uint64_t*)data, addr);
- break;
- default:
- /* mmio_ro_emulated_write() already validated the size */
- ASSERT_UNREACHABLE();
+ if ( !write_mmio(addr + offset, data, len) )
goto write_ignored;
- }
}
#ifdef CONFIG_HVM
@@ -5185,18 +5167,20 @@ int cf_check mmio_ro_emulated_write(
struct x86_emulate_ctxt *ctxt)
{
struct mmio_ro_emulate_ctxt *mmio_ro_ctxt = ctxt->data;
+ unsigned long data = 0;
/* Only allow naturally-aligned stores at the original %cr2 address. */
if ( ((bytes | offset) & (bytes - 1)) || !bytes ||
- offset != mmio_ro_ctxt->cr2 )
+ offset != mmio_ro_ctxt->cr2 || bytes > sizeof(data) )
{
gdprintk(XENLOG_WARNING, "bad access (cr2=%lx, addr=%lx, bytes=%u)\n",
mmio_ro_ctxt->cr2, offset, bytes);
return X86EMUL_UNHANDLEABLE;
}
+ memcpy(&data, p_data, bytes);
subpage_mmio_write_emulate(mmio_ro_ctxt->mfn, PAGE_OFFSET(offset),
- p_data, bytes);
+ data, bytes);
return X86EMUL_OKAY;
}
diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
index 6bd8c55bb48e..6455f2a03a01 100644
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -17,6 +17,7 @@
* License along with this program; If not, see <http://www.gnu.org/licenses/>.
*/
+#include <xen/io.h>
#include <xen/sched.h>
#include <xen/vpci.h>
@@ -344,28 +345,7 @@ static int adjacent_read(const struct domain *d, const struct vpci_msix *msix,
return X86EMUL_OKAY;
}
- switch ( len )
- {
- case 1:
- *data = readb(mem + PAGE_OFFSET(addr));
- break;
-
- case 2:
- *data = readw(mem + PAGE_OFFSET(addr));
- break;
-
- case 4:
- *data = readl(mem + PAGE_OFFSET(addr));
- break;
-
- case 8:
- *data = readq(mem + PAGE_OFFSET(addr));
- break;
-
- default:
- ASSERT_UNREACHABLE();
- break;
- }
+ *data = read_mmio(mem + PAGE_OFFSET(addr), len);
spin_unlock(&vpci->lock);
return X86EMUL_OKAY;
@@ -493,28 +473,7 @@ static int adjacent_write(const struct domain *d, const struct vpci_msix *msix,
return X86EMUL_OKAY;
}
- switch ( len )
- {
- case 1:
- writeb(data, mem + PAGE_OFFSET(addr));
- break;
-
- case 2:
- writew(data, mem + PAGE_OFFSET(addr));
- break;
-
- case 4:
- writel(data, mem + PAGE_OFFSET(addr));
- break;
-
- case 8:
- writeq(data, mem + PAGE_OFFSET(addr));
- break;
-
- default:
- ASSERT_UNREACHABLE();
- break;
- }
+ write_mmio(mem + PAGE_OFFSET(addr), data, len);
spin_unlock(&vpci->lock);
return X86EMUL_OKAY;
diff --git a/xen/include/xen/io.h b/xen/include/xen/io.h
new file mode 100644
index 000000000000..164a20c5d7b5
--- /dev/null
+++ b/xen/include/xen/io.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * IO related routines.
+ *
+ * Copyright (c) 2025 Cloud Software Group
+ */
+#ifndef XEN_IO_H
+#define XEN_IO_H
+
+#include <xen/bug.h>
+
+#include <asm/io.h>
+
+static inline unsigned long read_mmio(const volatile void __iomem *mem,
+ unsigned int size)
+{
+ switch ( size )
+ {
+ case 1:
+ return readb(mem);
+
+ case 2:
+ return readw(mem);
+
+ case 4:
+ return readl(mem);
+
+#ifdef CONFIG_64BIT
+ case 8:
+ return readq(mem);
+#endif
+
+ default:
+ ASSERT_UNREACHABLE();
+ return ~0UL;
+ }
+}
+
+static inline bool write_mmio(volatile void __iomem *mem, unsigned long data,
+ unsigned int size)
+{
+ switch ( size )
+ {
+ case 1:
+ writeb(data, mem);
+ break;
+
+ case 2:
+ writew(data, mem);
+ break;
+
+ case 4:
+ writel(data, mem);
+ break;
+
+#ifdef CONFIG_64BIT
+ case 8:
+ writeq(data, mem);
+ break;
+#endif
+
+ default:
+ ASSERT_UNREACHABLE();
+ return false;
+ }
+
+ return true;
+}
+
+#endif /* XEN_IO_H */
--
2.48.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH v3 1/4] xen/io: provide helpers for multi size MMIO accesses
2025-04-17 15:25 ` [PATCH v3 1/4] xen/io: provide helpers for multi size MMIO accesses Roger Pau Monne
@ 2025-04-17 15:32 ` Jan Beulich
0 siblings, 0 replies; 12+ messages in thread
From: Jan Beulich @ 2025-04-17 15:32 UTC (permalink / raw)
To: Roger Pau Monne
Cc: Andrew Cooper, Anthony PERARD, Michal Orzel, Julien Grall,
Stefano Stabellini, xen-devel
On 17.04.2025 17:25, Roger Pau Monne wrote:
> Several handlers have the same necessity of reading or writing from or to
> an MMIO region using 1, 2, 4 or 8 bytes accesses. So far this has been
> open-coded in the function itself. Instead provide a new set of handlers
> that encapsulate the accesses.
>
> Since the added helpers are not architecture specific, introduce a new
> generic io.h header.
>
> No functional change intended.
>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 2/4] x86/hvm: fix handling of accesses to partial r/o MMIO pages
2025-04-17 15:25 [PATCH v3 0/4] xen/x86: fix implementation of subpage r/o MMIO Roger Pau Monne
2025-04-17 15:25 ` [PATCH v3 1/4] xen/io: provide helpers for multi size MMIO accesses Roger Pau Monne
@ 2025-04-17 15:25 ` Roger Pau Monne
2025-04-17 15:38 ` Jan Beulich
2025-04-17 15:25 ` [PATCH v3 3/4] x86/hvm: only register the r/o subpage ops when needed Roger Pau Monne
2025-04-17 15:25 ` [PATCH v3 4/4] x86/mm: move mmio_ro_emulated_write() to PV only file Roger Pau Monne
3 siblings, 1 reply; 12+ messages in thread
From: Roger Pau Monne @ 2025-04-17 15:25 UTC (permalink / raw)
To: xen-devel; +Cc: Roger Pau Monne, Jan Beulich, Andrew Cooper
The current logic to handle accesses to MMIO pages partially read-only is
based on the (now removed) logic used to handle accesses to the r/o MMCFG
region(s) for PVH v1 dom0. However that has issues when running on AMD
hardware, as in that case the guest linear address that triggered the fault
is not provided as part of the VM exit. This caused
mmio_ro_emulated_write() to always fail before calling
subpage_mmio_write_emulate() when running on AMD and called from an HVM
context.
Take a different approach and convert the handling of partial read-only
MMIO page accesses into an HVM MMIO ops handler, as that's the more natural
way to handle this kind of emulation for HVM domains.
This allows getting rid of hvm_emulate_one_mmio() and it's single call site
in hvm_hap_nested_page_fault(). As part of the fix r/o MMIO accesses are
now handled by handle_mmio_with_translation(), re-using the same logic that
was used for other read-only types part of p2m_is_discard_write(). The
usage of emulation for faulting p2m_mmio_direct types is limited to
addresses in the r/o MMIO range. The page present check is dropped as type
p2m_mmio_direct must have the present bit set in the PTE.
Note a small adjustment is needed to the `pf-fixup` dom0 PVH logic: avoid
attempting to fixup faults resulting from write accesses to read-only MMIO
regions, as handling of those accesses is now done by handle_mmio().
Fixes: 33c19df9a5a0 ('x86/PCI: intercept accesses to RO MMIO from dom0s in HVM containers')
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v2:
- Attempt to fixup read access to r/o MMIO regions for dom0 PVH.
- Use PFN_DOWN() with get_gfn_query().
- Reject unaligned accesses.
- Add comment about the need to handle reads.
Changes since v1:
- Introduce hvm/mmio.c to place the r/o MMIO handlers.
- Add comment about pf-fixup and r/o MMIO range checking.
- Expand commit message about dropping the PTE present check and usage of
the emulator.
- Return X86EMUL_OKAY in the read handler if the MMIO region is not found.
- Check the faulting address is in the mmio_ro_ranges before sending for
emulation.
---
xen/arch/x86/hvm/Makefile | 1 +
xen/arch/x86/hvm/emulate.c | 54 ++---------
xen/arch/x86/hvm/hvm.c | 17 ++--
xen/arch/x86/hvm/mmio.c | 125 +++++++++++++++++++++++++
xen/arch/x86/include/asm/hvm/emulate.h | 1 -
xen/arch/x86/include/asm/hvm/io.h | 3 +
xen/arch/x86/include/asm/mm.h | 12 +++
xen/arch/x86/mm.c | 37 +-------
8 files changed, 160 insertions(+), 90 deletions(-)
create mode 100644 xen/arch/x86/hvm/mmio.c
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index 4c1fa5c6c2bf..6ec2c8f2db56 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -15,6 +15,7 @@ obj-y += intercept.o
obj-y += io.o
obj-y += ioreq.o
obj-y += irq.o
+obj-y += mmio.o
obj-y += monitor.o
obj-y += mtrr.o
obj-y += nestedhvm.o
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 79c180b408ac..65b41b964df0 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -370,7 +370,15 @@ static int hvmemul_do_io(
/* If there is no suitable backing DM, just ignore accesses */
if ( !s )
{
- if ( is_mmio && is_hardware_domain(currd) )
+ if ( is_mmio && is_hardware_domain(currd) &&
+ /*
+ * Do not attempt to fixup write accesses to r/o MMIO regions,
+ * they are expected to be terminated by the null handler
+ * below.
+ */
+ (!rangeset_contains_singleton(mmio_ro_ranges,
+ PFN_DOWN(addr)) ||
+ dir == IOREQ_READ) )
{
/*
* PVH dom0 is likely missing MMIO mappings on the p2m, due to
@@ -2859,50 +2867,6 @@ int hvm_emulate_one(
return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops, completion);
}
-int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
-{
- static const struct x86_emulate_ops hvm_ro_emulate_ops_mmio = {
- .read = x86emul_unhandleable_rw,
- .insn_fetch = hvmemul_insn_fetch,
- .write = mmio_ro_emulated_write,
- .validate = hvmemul_validate,
- };
- struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = gla, .mfn = _mfn(mfn) };
- struct hvm_emulate_ctxt ctxt;
- unsigned int seg, bdf;
- int rc;
-
- if ( pci_ro_mmcfg_decode(mfn, &seg, &bdf) )
- {
- /* Should be always handled by vPCI for PVH dom0. */
- gdprintk(XENLOG_ERR, "unhandled MMCFG access for %pp\n",
- &PCI_SBDF(seg, bdf));
- ASSERT_UNREACHABLE();
- return X86EMUL_UNHANDLEABLE;
- }
-
- hvm_emulate_init_once(&ctxt, x86_insn_is_mem_write,
- guest_cpu_user_regs());
- ctxt.ctxt.data = &mmio_ro_ctxt;
-
- switch ( rc = _hvm_emulate_one(&ctxt, &hvm_ro_emulate_ops_mmio,
- VIO_no_completion) )
- {
- case X86EMUL_UNHANDLEABLE:
- case X86EMUL_UNIMPLEMENTED:
- hvm_dump_emulation_state(XENLOG_G_WARNING, "r/o MMIO", &ctxt, rc);
- break;
- case X86EMUL_EXCEPTION:
- hvm_inject_event(&ctxt.ctxt.event);
- /* fallthrough */
- default:
- hvm_emulate_writeback(&ctxt);
- break;
- }
-
- return rc;
-}
-
void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
unsigned int errcode)
{
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 6f1174c5127e..6b998387e3d8 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -8,6 +8,7 @@
*/
#include <xen/init.h>
+#include <xen/io.h>
#include <xen/ioreq.h>
#include <xen/lib.h>
#include <xen/trace.h>
@@ -35,7 +36,6 @@
#include <asm/current.h>
#include <asm/debugreg.h>
#include <asm/e820.h>
-#include <asm/io.h>
#include <asm/regs.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
@@ -692,6 +692,8 @@ int hvm_domain_initialise(struct domain *d,
register_portio_handler(d, XEN_HVM_DEBUGCONS_IOPORT, 1, hvm_print_line);
+ register_subpage_ro_handler(d);
+
if ( hvm_tsc_scaling_supported )
d->arch.hvm.tsc_scaling_ratio = hvm_default_tsc_scaling_ratio;
@@ -1981,7 +1983,10 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
*/
if ( (p2mt == p2m_mmio_dm) ||
(npfec.write_access &&
- (p2m_is_discard_write(p2mt) || (p2mt == p2m_ioreq_server))) )
+ (p2m_is_discard_write(p2mt) || (p2mt == p2m_ioreq_server) ||
+ /* MMIO entries can be r/o if the target mfn is in mmio_ro_ranges. */
+ (p2mt == p2m_mmio_direct &&
+ rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn))))) )
{
if ( !handle_mmio_with_translation(gla, gfn, npfec) )
hvm_inject_hw_exception(X86_EXC_GP, 0);
@@ -2033,14 +2038,6 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
goto out_put_gfn;
}
- if ( (p2mt == p2m_mmio_direct) && npfec.write_access && npfec.present &&
- (is_hardware_domain(currd) || subpage_mmio_write_accept(mfn, gla)) &&
- (hvm_emulate_one_mmio(mfn_x(mfn), gla) == X86EMUL_OKAY) )
- {
- rc = 1;
- goto out_put_gfn;
- }
-
/* If we fell through, the vcpu will retry now that access restrictions have
* been removed. It may fault again if the p2m entry type still requires so.
* Otherwise, this is an error condition. */
diff --git a/xen/arch/x86/hvm/mmio.c b/xen/arch/x86/hvm/mmio.c
new file mode 100644
index 000000000000..88e9d0c65d89
--- /dev/null
+++ b/xen/arch/x86/hvm/mmio.c
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * MMIO related routines.
+ *
+ * Copyright (c) 2025 Cloud Software Group
+ */
+
+#include <xen/io.h>
+#include <xen/mm.h>
+
+#include <asm/p2m.h>
+
+static int cf_check subpage_mmio_accept(struct vcpu *v, unsigned long addr)
+{
+ p2m_type_t t;
+ mfn_t mfn = get_gfn_query_unlocked(v->domain, PFN_DOWN(addr), &t);
+
+ return !mfn_eq(mfn, INVALID_MFN) && t == p2m_mmio_direct &&
+ subpage_mmio_find_page(mfn);
+}
+
+/*
+ * The guest has read access to those regions, and consequently read accesses
+ * shouldn't fault. However read-modify-write operations may take this path,
+ * so handling of reads is necessary.
+ */
+static int cf_check subpage_mmio_read(
+ struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
+{
+ struct domain *d = v->domain;
+ unsigned long gfn = PFN_DOWN(addr);
+ p2m_type_t t;
+ mfn_t mfn;
+ struct subpage_ro_range *entry;
+ volatile void __iomem *mem;
+
+ *data = ~0UL;
+
+ if ( !IS_ALIGNED(len | addr, len) )
+ {
+ gprintk(XENLOG_ERR,
+ "ignoring unaligned read to r/o MMIO subpage %#lx size %u\n",
+ addr, len);
+ return X86EMUL_OKAY;
+ }
+
+ mfn = get_gfn_query(d, gfn, &t);
+ if ( mfn_eq(mfn, INVALID_MFN) || t != p2m_mmio_direct )
+ {
+ put_gfn(d, gfn);
+ return X86EMUL_RETRY;
+ }
+
+ entry = subpage_mmio_find_page(mfn);
+ if ( !entry )
+ {
+ put_gfn(d, gfn);
+ return X86EMUL_OKAY;
+ }
+
+ mem = subpage_mmio_map_page(entry);
+ if ( !mem )
+ {
+ put_gfn(d, gfn);
+ gprintk(XENLOG_ERR,
+ "Failed to map page for MMIO read at %#lx -> %#lx\n",
+ addr, mfn_to_maddr(mfn) + PAGE_OFFSET(addr));
+ return X86EMUL_OKAY;
+ }
+
+ *data = read_mmio(mem + PAGE_OFFSET(addr), len);
+
+ put_gfn(d, gfn);
+ return X86EMUL_OKAY;
+}
+
+static int cf_check subpage_mmio_write(
+ struct vcpu *v, unsigned long addr, unsigned int len, unsigned long data)
+{
+ struct domain *d = v->domain;
+ unsigned long gfn = PFN_DOWN(addr);
+ p2m_type_t t;
+ mfn_t mfn;
+
+ if ( !IS_ALIGNED(len | addr, len) )
+ {
+ gprintk(XENLOG_ERR,
+ "ignoring unaligned write to r/o MMIO subpage %#lx size %u\n",
+ addr, len);
+ return X86EMUL_OKAY;
+ }
+
+ mfn = get_gfn_query(d, gfn, &t);
+ if ( mfn_eq(mfn, INVALID_MFN) || t != p2m_mmio_direct )
+ {
+ put_gfn(d, gfn);
+ return X86EMUL_RETRY;
+ }
+
+ subpage_mmio_write_emulate(mfn, PAGE_OFFSET(addr), data, len);
+
+ put_gfn(d, gfn);
+ return X86EMUL_OKAY;
+}
+
+void register_subpage_ro_handler(struct domain *d)
+{
+ static const struct hvm_mmio_ops subpage_mmio_ops = {
+ .check = subpage_mmio_accept,
+ .read = subpage_mmio_read,
+ .write = subpage_mmio_write,
+ };
+
+ register_mmio_handler(d, &subpage_mmio_ops);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/include/asm/hvm/emulate.h b/xen/arch/x86/include/asm/hvm/emulate.h
index c7a2d2a5be4e..178ac32e151f 100644
--- a/xen/arch/x86/include/asm/hvm/emulate.h
+++ b/xen/arch/x86/include/asm/hvm/emulate.h
@@ -86,7 +86,6 @@ void hvmemul_cancel(struct vcpu *v);
struct segment_register *hvmemul_get_seg_reg(
enum x86_segment seg,
struct hvm_emulate_ctxt *hvmemul_ctxt);
-int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla);
static inline bool handle_mmio(void)
{
diff --git a/xen/arch/x86/include/asm/hvm/io.h b/xen/arch/x86/include/asm/hvm/io.h
index 565bad300d20..c12f099a037c 100644
--- a/xen/arch/x86/include/asm/hvm/io.h
+++ b/xen/arch/x86/include/asm/hvm/io.h
@@ -135,6 +135,9 @@ void destroy_vpci_mmcfg(struct domain *d);
/* Remove MMCFG regions from a domain ->iomem_caps. */
int vpci_mmcfg_deny_access(struct domain *d);
+/* r/o MMIO subpage access handler. */
+void register_subpage_ro_handler(struct domain *d);
+
#endif /* __ASM_X86_HVM_IO_H__ */
diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
index a1bc8cc27451..c2e9ef6e5023 100644
--- a/xen/arch/x86/include/asm/mm.h
+++ b/xen/arch/x86/include/asm/mm.h
@@ -554,6 +554,18 @@ int cf_check mmio_ro_emulated_write(
enum x86_segment seg, unsigned long offset, void *p_data,
unsigned int bytes, struct x86_emulate_ctxt *ctxt);
+/* r/o MMIO subpage access handlers. */
+struct subpage_ro_range {
+ struct list_head list;
+ mfn_t mfn;
+ void __iomem *mapped;
+ DECLARE_BITMAP(ro_elems, PAGE_SIZE / MMIO_RO_SUBPAGE_GRAN);
+};
+struct subpage_ro_range *subpage_mmio_find_page(mfn_t mfn);
+void __iomem *subpage_mmio_map_page(struct subpage_ro_range *entry);
+void subpage_mmio_write_emulate(
+ mfn_t mfn, unsigned int offset, unsigned long data, unsigned int len);
+
int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
extern int pagefault_by_memadd(unsigned long addr, struct cpu_user_regs *regs);
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 8baac132b484..927beb6f07aa 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -157,13 +157,6 @@ struct rangeset *__read_mostly mmio_ro_ranges;
static uint32_t __ro_after_init base_disallow_mask;
/* Handling sub-page read-only MMIO regions */
-struct subpage_ro_range {
- struct list_head list;
- mfn_t mfn;
- void __iomem *mapped;
- DECLARE_BITMAP(ro_elems, PAGE_SIZE / MMIO_RO_SUBPAGE_GRAN);
-};
-
static LIST_HEAD_RO_AFTER_INIT(subpage_ro_ranges);
static DEFINE_SPINLOCK(subpage_ro_lock);
@@ -4929,7 +4922,7 @@ long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
return rc;
}
-static struct subpage_ro_range *subpage_mmio_find_page(mfn_t mfn)
+struct subpage_ro_range *subpage_mmio_find_page(mfn_t mfn)
{
struct subpage_ro_range *entry;
@@ -5074,7 +5067,7 @@ int __init subpage_mmio_ro_add(
return rc;
}
-static void __iomem *subpage_mmio_map_page(
+void __iomem *subpage_mmio_map_page(
struct subpage_ro_range *entry)
{
void __iomem *mapped_page;
@@ -5099,7 +5092,7 @@ static void __iomem *subpage_mmio_map_page(
return entry->mapped;
}
-static void subpage_mmio_write_emulate(
+void subpage_mmio_write_emulate(
mfn_t mfn,
unsigned int offset,
unsigned long data,
@@ -5135,30 +5128,6 @@ static void subpage_mmio_write_emulate(
goto write_ignored;
}
-#ifdef CONFIG_HVM
-bool subpage_mmio_write_accept(mfn_t mfn, unsigned long gla)
-{
- unsigned int offset = PAGE_OFFSET(gla);
- const struct subpage_ro_range *entry;
-
- entry = subpage_mmio_find_page(mfn);
- if ( !entry )
- return false;
-
- if ( !test_bit(offset / MMIO_RO_SUBPAGE_GRAN, entry->ro_elems) )
- {
- /*
- * We don't know the write size at this point yet, so it could be
- * an unaligned write, but accept it here anyway and deal with it
- * later.
- */
- return true;
- }
-
- return false;
-}
-#endif
-
int cf_check mmio_ro_emulated_write(
enum x86_segment seg,
unsigned long offset,
--
2.48.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH v3 2/4] x86/hvm: fix handling of accesses to partial r/o MMIO pages
2025-04-17 15:25 ` [PATCH v3 2/4] x86/hvm: fix handling of accesses to partial r/o MMIO pages Roger Pau Monne
@ 2025-04-17 15:38 ` Jan Beulich
2025-04-17 16:23 ` Roger Pau Monné
0 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2025-04-17 15:38 UTC (permalink / raw)
To: Roger Pau Monne; +Cc: Andrew Cooper, xen-devel
On 17.04.2025 17:25, Roger Pau Monne wrote:
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -370,7 +370,15 @@ static int hvmemul_do_io(
> /* If there is no suitable backing DM, just ignore accesses */
> if ( !s )
> {
> - if ( is_mmio && is_hardware_domain(currd) )
> + if ( is_mmio && is_hardware_domain(currd) &&
> + /*
> + * Do not attempt to fixup write accesses to r/o MMIO regions,
> + * they are expected to be terminated by the null handler
> + * below.
> + */
> + (!rangeset_contains_singleton(mmio_ro_ranges,
> + PFN_DOWN(addr)) ||
> + dir == IOREQ_READ) )
These two would better be swapped, for the cheap one to be done first.
> --- /dev/null
> +++ b/xen/arch/x86/hvm/mmio.c
> @@ -0,0 +1,125 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * MMIO related routines.
> + *
> + * Copyright (c) 2025 Cloud Software Group
> + */
> +
> +#include <xen/io.h>
> +#include <xen/mm.h>
> +
> +#include <asm/p2m.h>
> +
> +static int cf_check subpage_mmio_accept(struct vcpu *v, unsigned long addr)
> +{
> + p2m_type_t t;
> + mfn_t mfn = get_gfn_query_unlocked(v->domain, PFN_DOWN(addr), &t);
> +
> + return !mfn_eq(mfn, INVALID_MFN) && t == p2m_mmio_direct &&
> + subpage_mmio_find_page(mfn);
> +}
> +
> +/*
> + * The guest has read access to those regions, and consequently read accesses
> + * shouldn't fault. However read-modify-write operations may take this path,
> + * so handling of reads is necessary.
> + */
> +static int cf_check subpage_mmio_read(
> + struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
> +{
> + struct domain *d = v->domain;
> + unsigned long gfn = PFN_DOWN(addr);
> + p2m_type_t t;
> + mfn_t mfn;
> + struct subpage_ro_range *entry;
> + volatile void __iomem *mem;
> +
> + *data = ~0UL;
> +
> + if ( !IS_ALIGNED(len | addr, len) )
What's the point of doing the | ? len can't be misaligned with itself?
> --- a/xen/arch/x86/include/asm/mm.h
> +++ b/xen/arch/x86/include/asm/mm.h
> @@ -554,6 +554,18 @@ int cf_check mmio_ro_emulated_write(
> enum x86_segment seg, unsigned long offset, void *p_data,
> unsigned int bytes, struct x86_emulate_ctxt *ctxt);
>
> +/* r/o MMIO subpage access handlers. */
> +struct subpage_ro_range {
> + struct list_head list;
> + mfn_t mfn;
> + void __iomem *mapped;
> + DECLARE_BITMAP(ro_elems, PAGE_SIZE / MMIO_RO_SUBPAGE_GRAN);
> +};
> +struct subpage_ro_range *subpage_mmio_find_page(mfn_t mfn);
> +void __iomem *subpage_mmio_map_page(struct subpage_ro_range *entry);
I notice you didn't move the __iomem, which - as indicated - I agree with,
but Andrew didn't. Did you sort this with him privately?
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH v3 2/4] x86/hvm: fix handling of accesses to partial r/o MMIO pages
2025-04-17 15:38 ` Jan Beulich
@ 2025-04-17 16:23 ` Roger Pau Monné
2025-04-22 6:46 ` Jan Beulich
0 siblings, 1 reply; 12+ messages in thread
From: Roger Pau Monné @ 2025-04-17 16:23 UTC (permalink / raw)
To: Jan Beulich; +Cc: Andrew Cooper, xen-devel
On Thu, Apr 17, 2025 at 05:38:54PM +0200, Jan Beulich wrote:
> On 17.04.2025 17:25, Roger Pau Monne wrote:
> > --- a/xen/arch/x86/hvm/emulate.c
> > +++ b/xen/arch/x86/hvm/emulate.c
> > @@ -370,7 +370,15 @@ static int hvmemul_do_io(
> > /* If there is no suitable backing DM, just ignore accesses */
> > if ( !s )
> > {
> > - if ( is_mmio && is_hardware_domain(currd) )
> > + if ( is_mmio && is_hardware_domain(currd) &&
> > + /*
> > + * Do not attempt to fixup write accesses to r/o MMIO regions,
> > + * they are expected to be terminated by the null handler
> > + * below.
> > + */
> > + (!rangeset_contains_singleton(mmio_ro_ranges,
> > + PFN_DOWN(addr)) ||
> > + dir == IOREQ_READ) )
>
> These two would better be swapped, for the cheap one to be done first.
Can swap, thanks for pointing out.
> > --- /dev/null
> > +++ b/xen/arch/x86/hvm/mmio.c
> > @@ -0,0 +1,125 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * MMIO related routines.
> > + *
> > + * Copyright (c) 2025 Cloud Software Group
> > + */
> > +
> > +#include <xen/io.h>
> > +#include <xen/mm.h>
> > +
> > +#include <asm/p2m.h>
> > +
> > +static int cf_check subpage_mmio_accept(struct vcpu *v, unsigned long addr)
> > +{
> > + p2m_type_t t;
> > + mfn_t mfn = get_gfn_query_unlocked(v->domain, PFN_DOWN(addr), &t);
> > +
> > + return !mfn_eq(mfn, INVALID_MFN) && t == p2m_mmio_direct &&
> > + subpage_mmio_find_page(mfn);
> > +}
> > +
> > +/*
> > + * The guest has read access to those regions, and consequently read accesses
> > + * shouldn't fault. However read-modify-write operations may take this path,
> > + * so handling of reads is necessary.
> > + */
> > +static int cf_check subpage_mmio_read(
> > + struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
> > +{
> > + struct domain *d = v->domain;
> > + unsigned long gfn = PFN_DOWN(addr);
> > + p2m_type_t t;
> > + mfn_t mfn;
> > + struct subpage_ro_range *entry;
> > + volatile void __iomem *mem;
> > +
> > + *data = ~0UL;
> > +
> > + if ( !IS_ALIGNED(len | addr, len) )
>
> What's the point of doing the | ? len can't be misaligned with itself?
Hm, it's the same form that's used in mmio_ro_emulated_write(), I
assumed it was to catch illegal access lengths, like 3.
> > --- a/xen/arch/x86/include/asm/mm.h
> > +++ b/xen/arch/x86/include/asm/mm.h
> > @@ -554,6 +554,18 @@ int cf_check mmio_ro_emulated_write(
> > enum x86_segment seg, unsigned long offset, void *p_data,
> > unsigned int bytes, struct x86_emulate_ctxt *ctxt);
> >
> > +/* r/o MMIO subpage access handlers. */
> > +struct subpage_ro_range {
> > + struct list_head list;
> > + mfn_t mfn;
> > + void __iomem *mapped;
> > + DECLARE_BITMAP(ro_elems, PAGE_SIZE / MMIO_RO_SUBPAGE_GRAN);
> > +};
> > +struct subpage_ro_range *subpage_mmio_find_page(mfn_t mfn);
> > +void __iomem *subpage_mmio_map_page(struct subpage_ro_range *entry);
>
> I notice you didn't move the __iomem, which - as indicated - I agree with,
> but Andrew didn't. Did you sort this with him privately?
I will raise with him, seeing as he didn't reply to your last email, I
assumed he was OK with the reasoning?
Thanks, Roger.
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH v3 2/4] x86/hvm: fix handling of accesses to partial r/o MMIO pages
2025-04-17 16:23 ` Roger Pau Monné
@ 2025-04-22 6:46 ` Jan Beulich
2025-04-23 8:21 ` Roger Pau Monné
0 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2025-04-22 6:46 UTC (permalink / raw)
To: Roger Pau Monné; +Cc: Andrew Cooper, xen-devel
On 17.04.2025 18:23, Roger Pau Monné wrote:
> On Thu, Apr 17, 2025 at 05:38:54PM +0200, Jan Beulich wrote:
>> On 17.04.2025 17:25, Roger Pau Monne wrote:
>>> --- /dev/null
>>> +++ b/xen/arch/x86/hvm/mmio.c
>>> @@ -0,0 +1,125 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>> +/*
>>> + * MMIO related routines.
>>> + *
>>> + * Copyright (c) 2025 Cloud Software Group
>>> + */
>>> +
>>> +#include <xen/io.h>
>>> +#include <xen/mm.h>
>>> +
>>> +#include <asm/p2m.h>
>>> +
>>> +static int cf_check subpage_mmio_accept(struct vcpu *v, unsigned long addr)
>>> +{
>>> + p2m_type_t t;
>>> + mfn_t mfn = get_gfn_query_unlocked(v->domain, PFN_DOWN(addr), &t);
>>> +
>>> + return !mfn_eq(mfn, INVALID_MFN) && t == p2m_mmio_direct &&
>>> + subpage_mmio_find_page(mfn);
>>> +}
>>> +
>>> +/*
>>> + * The guest has read access to those regions, and consequently read accesses
>>> + * shouldn't fault. However read-modify-write operations may take this path,
>>> + * so handling of reads is necessary.
>>> + */
>>> +static int cf_check subpage_mmio_read(
>>> + struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
>>> +{
>>> + struct domain *d = v->domain;
>>> + unsigned long gfn = PFN_DOWN(addr);
>>> + p2m_type_t t;
>>> + mfn_t mfn;
>>> + struct subpage_ro_range *entry;
>>> + volatile void __iomem *mem;
>>> +
>>> + *data = ~0UL;
>>> +
>>> + if ( !IS_ALIGNED(len | addr, len) )
>>
>> What's the point of doing the | ? len can't be misaligned with itself?
>
> Hm, it's the same form that's used in mmio_ro_emulated_write(), I
> assumed it was to catch illegal access lengths, like 3.
Oh, I see. But that's not using IS_ALIGNED(), and imo validly so, despite the
apparent open-coding. IS_ALIGNED() requires the 2nd argument to be a power of
two. The combined check there is folding the power-of-2 one with the is-
aligned one.
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH v3 2/4] x86/hvm: fix handling of accesses to partial r/o MMIO pages
2025-04-22 6:46 ` Jan Beulich
@ 2025-04-23 8:21 ` Roger Pau Monné
2025-04-23 8:32 ` Jan Beulich
0 siblings, 1 reply; 12+ messages in thread
From: Roger Pau Monné @ 2025-04-23 8:21 UTC (permalink / raw)
To: Jan Beulich; +Cc: Andrew Cooper, xen-devel
On Tue, Apr 22, 2025 at 08:46:13AM +0200, Jan Beulich wrote:
> On 17.04.2025 18:23, Roger Pau Monné wrote:
> > On Thu, Apr 17, 2025 at 05:38:54PM +0200, Jan Beulich wrote:
> >> On 17.04.2025 17:25, Roger Pau Monne wrote:
> >>> --- /dev/null
> >>> +++ b/xen/arch/x86/hvm/mmio.c
> >>> @@ -0,0 +1,125 @@
> >>> +/* SPDX-License-Identifier: GPL-2.0-only */
> >>> +/*
> >>> + * MMIO related routines.
> >>> + *
> >>> + * Copyright (c) 2025 Cloud Software Group
> >>> + */
> >>> +
> >>> +#include <xen/io.h>
> >>> +#include <xen/mm.h>
> >>> +
> >>> +#include <asm/p2m.h>
> >>> +
> >>> +static int cf_check subpage_mmio_accept(struct vcpu *v, unsigned long addr)
> >>> +{
> >>> + p2m_type_t t;
> >>> + mfn_t mfn = get_gfn_query_unlocked(v->domain, PFN_DOWN(addr), &t);
> >>> +
> >>> + return !mfn_eq(mfn, INVALID_MFN) && t == p2m_mmio_direct &&
> >>> + subpage_mmio_find_page(mfn);
> >>> +}
> >>> +
> >>> +/*
> >>> + * The guest has read access to those regions, and consequently read accesses
> >>> + * shouldn't fault. However read-modify-write operations may take this path,
> >>> + * so handling of reads is necessary.
> >>> + */
> >>> +static int cf_check subpage_mmio_read(
> >>> + struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
> >>> +{
> >>> + struct domain *d = v->domain;
> >>> + unsigned long gfn = PFN_DOWN(addr);
> >>> + p2m_type_t t;
> >>> + mfn_t mfn;
> >>> + struct subpage_ro_range *entry;
> >>> + volatile void __iomem *mem;
> >>> +
> >>> + *data = ~0UL;
> >>> +
> >>> + if ( !IS_ALIGNED(len | addr, len) )
> >>
> >> What's the point of doing the | ? len can't be misaligned with itself?
> >
> > Hm, it's the same form that's used in mmio_ro_emulated_write(), I
> > assumed it was to catch illegal access lengths, like 3.
>
> Oh, I see. But that's not using IS_ALIGNED(), and imo validly so, despite the
> apparent open-coding. IS_ALIGNED() requires the 2nd argument to be a power of
> two. The combined check there is folding the power-of-2 one with the is-
> aligned one.
Do you think it's worth keeping those checks then? I could do:
if ( len & (len - 1) || len > 8 || !IS_ALIGNED(addr, len) )
As a possibly more complete and easier to parse check?
Thanks, Roger.
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH v3 2/4] x86/hvm: fix handling of accesses to partial r/o MMIO pages
2025-04-23 8:21 ` Roger Pau Monné
@ 2025-04-23 8:32 ` Jan Beulich
2025-04-23 8:52 ` Roger Pau Monné
0 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2025-04-23 8:32 UTC (permalink / raw)
To: Roger Pau Monné; +Cc: Andrew Cooper, xen-devel
On 23.04.2025 10:21, Roger Pau Monné wrote:
> On Tue, Apr 22, 2025 at 08:46:13AM +0200, Jan Beulich wrote:
>> On 17.04.2025 18:23, Roger Pau Monné wrote:
>>> On Thu, Apr 17, 2025 at 05:38:54PM +0200, Jan Beulich wrote:
>>>> On 17.04.2025 17:25, Roger Pau Monne wrote:
>>>>> --- /dev/null
>>>>> +++ b/xen/arch/x86/hvm/mmio.c
>>>>> @@ -0,0 +1,125 @@
>>>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>>>> +/*
>>>>> + * MMIO related routines.
>>>>> + *
>>>>> + * Copyright (c) 2025 Cloud Software Group
>>>>> + */
>>>>> +
>>>>> +#include <xen/io.h>
>>>>> +#include <xen/mm.h>
>>>>> +
>>>>> +#include <asm/p2m.h>
>>>>> +
>>>>> +static int cf_check subpage_mmio_accept(struct vcpu *v, unsigned long addr)
>>>>> +{
>>>>> + p2m_type_t t;
>>>>> + mfn_t mfn = get_gfn_query_unlocked(v->domain, PFN_DOWN(addr), &t);
>>>>> +
>>>>> + return !mfn_eq(mfn, INVALID_MFN) && t == p2m_mmio_direct &&
>>>>> + subpage_mmio_find_page(mfn);
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * The guest has read access to those regions, and consequently read accesses
>>>>> + * shouldn't fault. However read-modify-write operations may take this path,
>>>>> + * so handling of reads is necessary.
>>>>> + */
>>>>> +static int cf_check subpage_mmio_read(
>>>>> + struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
>>>>> +{
>>>>> + struct domain *d = v->domain;
>>>>> + unsigned long gfn = PFN_DOWN(addr);
>>>>> + p2m_type_t t;
>>>>> + mfn_t mfn;
>>>>> + struct subpage_ro_range *entry;
>>>>> + volatile void __iomem *mem;
>>>>> +
>>>>> + *data = ~0UL;
>>>>> +
>>>>> + if ( !IS_ALIGNED(len | addr, len) )
>>>>
>>>> What's the point of doing the | ? len can't be misaligned with itself?
>>>
>>> Hm, it's the same form that's used in mmio_ro_emulated_write(), I
>>> assumed it was to catch illegal access lengths, like 3.
>>
>> Oh, I see. But that's not using IS_ALIGNED(), and imo validly so, despite the
>> apparent open-coding. IS_ALIGNED() requires the 2nd argument to be a power of
>> two. The combined check there is folding the power-of-2 one with the is-
>> aligned one.
>
> Do you think it's worth keeping those checks then?
Yes, I think we should be as strict as possible in what we (try to) emulate.
> I could do:
>
> if ( len & (len - 1) || len > 8 || !IS_ALIGNED(addr, len) )
>
> As a possibly more complete and easier to parse check?
If you dislike the form mmio_ro_emulated_write() uses, sure. However, you
will want to check len to be non-zero, while I'm unsure you need to check
len > 8 - mmio_ro_emulated_write() doesn't have such. Albeit - perhaps
wrongly so; we'd end at the ASSERT_UNREACHABLE() in
subpage_mmio_write_emulate() if a wider store was used. I guess I ought to
make a patch there, and you want to keep the "len > 8".
Jan
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH v3 2/4] x86/hvm: fix handling of accesses to partial r/o MMIO pages
2025-04-23 8:32 ` Jan Beulich
@ 2025-04-23 8:52 ` Roger Pau Monné
0 siblings, 0 replies; 12+ messages in thread
From: Roger Pau Monné @ 2025-04-23 8:52 UTC (permalink / raw)
To: Jan Beulich; +Cc: Andrew Cooper, xen-devel
On Wed, Apr 23, 2025 at 10:32:33AM +0200, Jan Beulich wrote:
> On 23.04.2025 10:21, Roger Pau Monné wrote:
> > On Tue, Apr 22, 2025 at 08:46:13AM +0200, Jan Beulich wrote:
> >> On 17.04.2025 18:23, Roger Pau Monné wrote:
> >>> On Thu, Apr 17, 2025 at 05:38:54PM +0200, Jan Beulich wrote:
> >>>> On 17.04.2025 17:25, Roger Pau Monne wrote:
> >>>>> --- /dev/null
> >>>>> +++ b/xen/arch/x86/hvm/mmio.c
> >>>>> @@ -0,0 +1,125 @@
> >>>>> +/* SPDX-License-Identifier: GPL-2.0-only */
> >>>>> +/*
> >>>>> + * MMIO related routines.
> >>>>> + *
> >>>>> + * Copyright (c) 2025 Cloud Software Group
> >>>>> + */
> >>>>> +
> >>>>> +#include <xen/io.h>
> >>>>> +#include <xen/mm.h>
> >>>>> +
> >>>>> +#include <asm/p2m.h>
> >>>>> +
> >>>>> +static int cf_check subpage_mmio_accept(struct vcpu *v, unsigned long addr)
> >>>>> +{
> >>>>> + p2m_type_t t;
> >>>>> + mfn_t mfn = get_gfn_query_unlocked(v->domain, PFN_DOWN(addr), &t);
> >>>>> +
> >>>>> + return !mfn_eq(mfn, INVALID_MFN) && t == p2m_mmio_direct &&
> >>>>> + subpage_mmio_find_page(mfn);
> >>>>> +}
> >>>>> +
> >>>>> +/*
> >>>>> + * The guest has read access to those regions, and consequently read accesses
> >>>>> + * shouldn't fault. However read-modify-write operations may take this path,
> >>>>> + * so handling of reads is necessary.
> >>>>> + */
> >>>>> +static int cf_check subpage_mmio_read(
> >>>>> + struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
> >>>>> +{
> >>>>> + struct domain *d = v->domain;
> >>>>> + unsigned long gfn = PFN_DOWN(addr);
> >>>>> + p2m_type_t t;
> >>>>> + mfn_t mfn;
> >>>>> + struct subpage_ro_range *entry;
> >>>>> + volatile void __iomem *mem;
> >>>>> +
> >>>>> + *data = ~0UL;
> >>>>> +
> >>>>> + if ( !IS_ALIGNED(len | addr, len) )
> >>>>
> >>>> What's the point of doing the | ? len can't be misaligned with itself?
> >>>
> >>> Hm, it's the same form that's used in mmio_ro_emulated_write(), I
> >>> assumed it was to catch illegal access lengths, like 3.
> >>
> >> Oh, I see. But that's not using IS_ALIGNED(), and imo validly so, despite the
> >> apparent open-coding. IS_ALIGNED() requires the 2nd argument to be a power of
> >> two. The combined check there is folding the power-of-2 one with the is-
> >> aligned one.
> >
> > Do you think it's worth keeping those checks then?
>
> Yes, I think we should be as strict as possible in what we (try to) emulate.
>
> > I could do:
> >
> > if ( len & (len - 1) || len > 8 || !IS_ALIGNED(addr, len) )
> >
> > As a possibly more complete and easier to parse check?
>
> If you dislike the form mmio_ro_emulated_write() uses, sure. However, you
> will want to check len to be non-zero, while I'm unsure you need to check
> len > 8 - mmio_ro_emulated_write() doesn't have such. Albeit - perhaps
> wrongly so; we'd end at the ASSERT_UNREACHABLE() in
> subpage_mmio_write_emulate() if a wider store was used. I guess I ought to
> make a patch there, and you want to keep the "len > 8".
OK, let me send v4 with those adjustments then.
Thanks, Roger.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 3/4] x86/hvm: only register the r/o subpage ops when needed
2025-04-17 15:25 [PATCH v3 0/4] xen/x86: fix implementation of subpage r/o MMIO Roger Pau Monne
2025-04-17 15:25 ` [PATCH v3 1/4] xen/io: provide helpers for multi size MMIO accesses Roger Pau Monne
2025-04-17 15:25 ` [PATCH v3 2/4] x86/hvm: fix handling of accesses to partial r/o MMIO pages Roger Pau Monne
@ 2025-04-17 15:25 ` Roger Pau Monne
2025-04-17 15:25 ` [PATCH v3 4/4] x86/mm: move mmio_ro_emulated_write() to PV only file Roger Pau Monne
3 siblings, 0 replies; 12+ messages in thread
From: Roger Pau Monne @ 2025-04-17 15:25 UTC (permalink / raw)
To: xen-devel; +Cc: Roger Pau Monne, Jan Beulich, Andrew Cooper
MMIO operation handlers can be expensive to process, hence attempt to
register only those that will be needed by the domain.
Subpage r/o MMIO regions are added exclusively at boot, further limit their
addition to strictly before the initial domain gets created, so by the time
initial domain creation happens Xen knows whether subpage is required or
not. This allows only registering the MMIO handler when there are
subpage regions to handle.
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
Could possibly be part of the previous patch, but strictly speaking is an
improvement, as even before the previous patch subpage r/o was always
called even when no subpage regions are registered.
---
xen/arch/x86/hvm/hvm.c | 3 ++-
xen/arch/x86/include/asm/mm.h | 1 +
xen/arch/x86/mm.c | 16 ++++++++++++++++
3 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 6b998387e3d8..4cb2e13046d1 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -692,7 +692,8 @@ int hvm_domain_initialise(struct domain *d,
register_portio_handler(d, XEN_HVM_DEBUGCONS_IOPORT, 1, hvm_print_line);
- register_subpage_ro_handler(d);
+ if ( subpage_ro_active() )
+ register_subpage_ro_handler(d);
if ( hvm_tsc_scaling_supported )
d->arch.hvm.tsc_scaling_ratio = hvm_default_tsc_scaling_ratio;
diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
index c2e9ef6e5023..aeb8ebcf2d56 100644
--- a/xen/arch/x86/include/asm/mm.h
+++ b/xen/arch/x86/include/asm/mm.h
@@ -561,6 +561,7 @@ struct subpage_ro_range {
void __iomem *mapped;
DECLARE_BITMAP(ro_elems, PAGE_SIZE / MMIO_RO_SUBPAGE_GRAN);
};
+bool subpage_ro_active(void);
struct subpage_ro_range *subpage_mmio_find_page(mfn_t mfn);
void __iomem *subpage_mmio_map_page(struct subpage_ro_range *entry);
void subpage_mmio_write_emulate(
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 927beb6f07aa..0e29bab03a95 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4922,6 +4922,11 @@ long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
return rc;
}
+bool subpage_ro_active(void)
+{
+ return !list_empty(&subpage_ro_ranges);
+}
+
struct subpage_ro_range *subpage_mmio_find_page(mfn_t mfn)
{
struct subpage_ro_range *entry;
@@ -5011,6 +5016,17 @@ int __init subpage_mmio_ro_add(
!IS_ALIGNED(size, MMIO_RO_SUBPAGE_GRAN) )
return -EINVAL;
+ /*
+ * Force all r/o subregions to be registered before initial domain
+ * creation, so that the emulation handlers can be added only when there
+ * are pages registered.
+ */
+ if ( system_state >= SYS_STATE_smp_boot )
+ {
+ ASSERT_UNREACHABLE();
+ return -EILSEQ;
+ }
+
if ( !size )
return 0;
--
2.48.1
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v3 4/4] x86/mm: move mmio_ro_emulated_write() to PV only file
2025-04-17 15:25 [PATCH v3 0/4] xen/x86: fix implementation of subpage r/o MMIO Roger Pau Monne
` (2 preceding siblings ...)
2025-04-17 15:25 ` [PATCH v3 3/4] x86/hvm: only register the r/o subpage ops when needed Roger Pau Monne
@ 2025-04-17 15:25 ` Roger Pau Monne
3 siblings, 0 replies; 12+ messages in thread
From: Roger Pau Monne @ 2025-04-17 15:25 UTC (permalink / raw)
To: xen-devel; +Cc: Roger Pau Monne, Jan Beulich, Andrew Cooper
mmio_ro_emulated_write() is only used in pv/ro-page-fault.c, move the
function to that file and make it static.
No functional change intended.
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/include/asm/mm.h | 12 ------------
xen/arch/x86/mm.c | 26 -------------------------
xen/arch/x86/pv/ro-page-fault.c | 34 +++++++++++++++++++++++++++++++++
3 files changed, 34 insertions(+), 38 deletions(-)
diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h
index aeb8ebcf2d56..2665daa6f74f 100644
--- a/xen/arch/x86/include/asm/mm.h
+++ b/xen/arch/x86/include/asm/mm.h
@@ -542,18 +542,6 @@ void memguard_unguard_stack(void *p);
int subpage_mmio_ro_add(paddr_t start, size_t size);
bool subpage_mmio_write_accept(mfn_t mfn, unsigned long gla);
-struct mmio_ro_emulate_ctxt {
- unsigned long cr2;
- /* Used only for mmcfg case */
- unsigned int seg, bdf;
- /* Used only for non-mmcfg case */
- mfn_t mfn;
-};
-
-int cf_check mmio_ro_emulated_write(
- enum x86_segment seg, unsigned long offset, void *p_data,
- unsigned int bytes, struct x86_emulate_ctxt *ctxt);
-
/* r/o MMIO subpage access handlers. */
struct subpage_ro_range {
struct list_head list;
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 0e29bab03a95..9ac855f998dc 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5144,32 +5144,6 @@ void subpage_mmio_write_emulate(
goto write_ignored;
}
-int cf_check mmio_ro_emulated_write(
- enum x86_segment seg,
- unsigned long offset,
- void *p_data,
- unsigned int bytes,
- struct x86_emulate_ctxt *ctxt)
-{
- struct mmio_ro_emulate_ctxt *mmio_ro_ctxt = ctxt->data;
- unsigned long data = 0;
-
- /* Only allow naturally-aligned stores at the original %cr2 address. */
- if ( ((bytes | offset) & (bytes - 1)) || !bytes ||
- offset != mmio_ro_ctxt->cr2 || bytes > sizeof(data) )
- {
- gdprintk(XENLOG_WARNING, "bad access (cr2=%lx, addr=%lx, bytes=%u)\n",
- mmio_ro_ctxt->cr2, offset, bytes);
- return X86EMUL_UNHANDLEABLE;
- }
-
- memcpy(&data, p_data, bytes);
- subpage_mmio_write_emulate(mmio_ro_ctxt->mfn, PAGE_OFFSET(offset),
- data, bytes);
-
- return X86EMUL_OKAY;
-}
-
/*
* For these PTE APIs, the caller must follow the alloc-map-unmap-free
* lifecycle, which means explicitly mapping the PTE pages before accessing
diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c
index 11b01c479e43..3dd795288379 100644
--- a/xen/arch/x86/pv/ro-page-fault.c
+++ b/xen/arch/x86/pv/ro-page-fault.c
@@ -298,6 +298,14 @@ static int ptwr_do_page_fault(struct x86_emulate_ctxt *ctxt,
* fault handling for read-only MMIO pages
*/
+struct mmio_ro_emulate_ctxt {
+ unsigned long cr2;
+ /* Used only for mmcfg case */
+ unsigned int seg, bdf;
+ /* Used only for non-mmcfg case */
+ mfn_t mfn;
+};
+
static int cf_check mmcfg_intercept_write(
enum x86_segment seg,
unsigned long offset,
@@ -329,6 +337,32 @@ static int cf_check mmcfg_intercept_write(
return X86EMUL_OKAY;
}
+static int cf_check mmio_ro_emulated_write(
+ enum x86_segment seg,
+ unsigned long offset,
+ void *p_data,
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
+{
+ struct mmio_ro_emulate_ctxt *mmio_ro_ctxt = ctxt->data;
+ unsigned long data = 0;
+
+ /* Only allow naturally-aligned stores at the original %cr2 address. */
+ if ( ((bytes | offset) & (bytes - 1)) || !bytes ||
+ offset != mmio_ro_ctxt->cr2 || bytes > sizeof(data) )
+ {
+ gdprintk(XENLOG_WARNING, "bad access (cr2=%lx, addr=%lx, bytes=%u)\n",
+ mmio_ro_ctxt->cr2, offset, bytes);
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ memcpy(&data, p_data, bytes);
+ subpage_mmio_write_emulate(mmio_ro_ctxt->mfn, PAGE_OFFSET(offset),
+ data, bytes);
+
+ return X86EMUL_OKAY;
+}
+
static const struct x86_emulate_ops mmio_ro_emulate_ops = {
.read = x86emul_unhandleable_rw,
.insn_fetch = ptwr_emulated_insn_fetch,
--
2.48.1
^ permalink raw reply related [flat|nested] 12+ messages in thread