From: Thomas Huth <thuth@redhat.com>
To: qemu-devel@nongnu.org
Cc: Stefan Hajnoczi <stefanha@redhat.com>,
Matthew Rosato <mjrosato@linux.ibm.com>,
David Hildenbrand <david@redhat.com>
Subject: [PULL 14/15] s390x/pci: add support for guests that request direct mapping
Date: Fri, 7 Mar 2025 12:53:13 +0100 [thread overview]
Message-ID: <20250307115314.1096373-15-thuth@redhat.com> (raw)
In-Reply-To: <20250307115314.1096373-1-thuth@redhat.com>
From: Matthew Rosato <mjrosato@linux.ibm.com>
When receiving a guest mpcifc(4) or mpcifc(6) instruction without the T
bit set, treat this as a request to perform direct mapping instead of
address translation. In order to facilitate this, pin the entirety of
guest memory into the host iommu.
Pinning for the direct mapping case is handled via vfio and its memory
listener. Additionally, ram discard settings are inherited from vfio:
coordinated discards (e.g. virtio-mem) are allowed while uncoordinated
discards (e.g. virtio-balloon) are disabled.
Subsequent guest DMA operations are all expected to be of the format
guest_phys+sdma, allowing them to be used as lookup into the host
iommu table.
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Message-ID: <20250226210013.238349-2-mjrosato@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
include/hw/s390x/s390-pci-bus.h | 3 +++
hw/s390x/s390-pci-bus.c | 39 +++++++++++++++++++++++++++++++--
hw/s390x/s390-pci-inst.c | 13 +++++++++--
hw/s390x/s390-pci-vfio.c | 23 +++++++++++++++----
hw/s390x/s390-virtio-ccw.c | 5 +++++
5 files changed, 75 insertions(+), 8 deletions(-)
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
index 2c43ea123f0..04944d4fed7 100644
--- a/include/hw/s390x/s390-pci-bus.h
+++ b/include/hw/s390x/s390-pci-bus.h
@@ -277,6 +277,7 @@ struct S390PCIIOMMU {
AddressSpace as;
MemoryRegion mr;
IOMMUMemoryRegion iommu_mr;
+ MemoryRegion *dm_mr;
bool enabled;
uint64_t g_iota;
uint64_t pba;
@@ -362,6 +363,7 @@ struct S390PCIBusDevice {
bool interp;
bool forwarding_assist;
bool aif;
+ bool rtr_avail;
QTAILQ_ENTRY(S390PCIBusDevice) link;
};
@@ -389,6 +391,7 @@ int pci_chsc_sei_nt2_have_event(void);
void s390_pci_sclp_configure(SCCB *sccb);
void s390_pci_sclp_deconfigure(SCCB *sccb);
void s390_pci_iommu_enable(S390PCIIOMMU *iommu);
+void s390_pci_iommu_direct_map_enable(S390PCIIOMMU *iommu);
void s390_pci_iommu_disable(S390PCIIOMMU *iommu);
void s390_pci_generate_error_event(uint16_t pec, uint32_t fh, uint32_t fid,
uint64_t faddr, uint32_t e);
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 913d72cc748..9d7b0f75407 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -18,6 +18,8 @@
#include "hw/s390x/s390-pci-inst.h"
#include "hw/s390x/s390-pci-kvm.h"
#include "hw/s390x/s390-pci-vfio.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+#include "hw/boards.h"
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
#include "hw/pci/pci_bridge.h"
@@ -724,12 +726,42 @@ void s390_pci_iommu_enable(S390PCIIOMMU *iommu)
g_free(name);
}
+void s390_pci_iommu_direct_map_enable(S390PCIIOMMU *iommu)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ S390CcwMachineState *s390ms = S390_CCW_MACHINE(ms);
+
+ /*
+ * For direct-mapping we must map the entire guest address space. Rather
+ * than using an iommu, create a memory region alias that maps GPA X to
+ * IOVA X + SDMA. VFIO will handle pinning via its memory listener.
+ */
+ g_autofree char *name = g_strdup_printf("iommu-dm-s390-%04x",
+ iommu->pbdev->uid);
+
+ iommu->dm_mr = g_malloc0(sizeof(*iommu->dm_mr));
+ memory_region_init_alias(iommu->dm_mr, OBJECT(&iommu->mr), name,
+ get_system_memory(), 0,
+ s390_get_memory_limit(s390ms));
+ iommu->enabled = true;
+ memory_region_add_subregion(&iommu->mr, iommu->pbdev->zpci_fn.sdma,
+ iommu->dm_mr);
+}
+
void s390_pci_iommu_disable(S390PCIIOMMU *iommu)
{
iommu->enabled = false;
g_hash_table_remove_all(iommu->iotlb);
- memory_region_del_subregion(&iommu->mr, MEMORY_REGION(&iommu->iommu_mr));
- object_unparent(OBJECT(&iommu->iommu_mr));
+ if (iommu->dm_mr) {
+ memory_region_del_subregion(&iommu->mr, iommu->dm_mr);
+ object_unparent(OBJECT(iommu->dm_mr));
+ g_free(iommu->dm_mr);
+ iommu->dm_mr = NULL;
+ } else {
+ memory_region_del_subregion(&iommu->mr,
+ MEMORY_REGION(&iommu->iommu_mr));
+ object_unparent(OBJECT(&iommu->iommu_mr));
+ }
}
static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn)
@@ -1145,6 +1177,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
/* Always intercept emulated devices */
pbdev->interp = false;
pbdev->forwarding_assist = false;
+ pbdev->rtr_avail = false;
}
if (s390_pci_msix_init(pbdev) && !pbdev->interp) {
@@ -1510,6 +1543,8 @@ static const Property s390_pci_device_properties[] = {
DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true),
DEFINE_PROP_BOOL("forwarding-assist", S390PCIBusDevice, forwarding_assist,
true),
+ DEFINE_PROP_BOOL("relaxed-translation", S390PCIBusDevice, rtr_avail,
+ true),
};
static const VMStateDescription s390_pci_device_vmstate = {
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index e386d75d58c..8cdeb6cb7f7 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -16,6 +16,7 @@
#include "exec/memory.h"
#include "qemu/error-report.h"
#include "system/hw_accel.h"
+#include "hw/boards.h"
#include "hw/pci/pci_device.h"
#include "hw/s390x/s390-pci-inst.h"
#include "hw/s390x/s390-pci-bus.h"
@@ -1008,17 +1009,25 @@ static int reg_ioat(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib,
}
/* currently we only support designation type 1 with translation */
- if (!(dt == ZPCI_IOTA_RTTO && t)) {
+ if (t && dt != ZPCI_IOTA_RTTO) {
error_report("unsupported ioat dt %d t %d", dt, t);
s390_program_interrupt(env, PGM_OPERAND, ra);
return -EINVAL;
+ } else if (!t && !pbdev->rtr_avail) {
+ error_report("relaxed translation not allowed");
+ s390_program_interrupt(env, PGM_OPERAND, ra);
+ return -EINVAL;
}
iommu->pba = pba;
iommu->pal = pal;
iommu->g_iota = g_iota;
- s390_pci_iommu_enable(iommu);
+ if (t) {
+ s390_pci_iommu_enable(iommu);
+ } else {
+ s390_pci_iommu_direct_map_enable(iommu);
+ }
return 0;
}
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index 7dbbc76823a..443e2229127 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -131,13 +131,28 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
/* Store function type separately for type-specific behavior */
pbdev->pft = cap->pft;
+ /*
+ * If the device is a passthrough ISM device, disallow relaxed
+ * translation.
+ */
+ if (pbdev->pft == ZPCI_PFT_ISM) {
+ pbdev->rtr_avail = false;
+ }
+
/*
* If appropriate, reduce the size of the supported DMA aperture reported
- * to the guest based upon the vfio DMA limit.
+ * to the guest based upon the vfio DMA limit. This is applicable for
+ * devices that are guaranteed to not use relaxed translation. If the
+ * device is capable of relaxed translation then we must advertise the
+ * full aperture. In this case, if translation is used then we will
+ * rely on the vfio DMA limit counting and use RPCIT CC1 / status 16
+ * to request that the guest free DMA mappings as necessary.
*/
- vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
- if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) {
- pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
+ if (!pbdev->rtr_avail) {
+ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
+ if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) {
+ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
+ }
}
}
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 51ae0c133d8..a9b3db19f63 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -936,8 +936,13 @@ static void ccw_machine_9_2_instance_options(MachineState *machine)
static void ccw_machine_9_2_class_options(MachineClass *mc)
{
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "relaxed-translation", "off", },
+ };
+
ccw_machine_10_0_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_9_2, hw_compat_9_2_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
}
DEFINE_CCW_MACHINE(9, 2);
--
2.48.1
next prev parent reply other threads:[~2025-03-07 11:55 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-07 11:52 [PULL 00/15] Functional and s390x patches before 10.0 soft-freeze Thomas Huth
2025-03-07 11:53 ` [PULL 01/15] tests/functional: remove unused 'bin_prefix' variable Thomas Huth
2025-03-07 11:53 ` [PULL 02/15] tests/functional: set 'qemu_bin' as an object level field Thomas Huth
2025-03-07 11:53 ` [PULL 03/15] tests/functional: reduce tuxrun maxmem to work on 32-bit hosts Thomas Huth
2025-03-07 11:53 ` [PULL 04/15] tests/functional: skip memaddr tests on 32-bit builds Thomas Huth
2025-03-07 11:53 ` [PULL 05/15] tests/functional: drop unused 'get_tag' method Thomas Huth
2025-03-07 11:53 ` [PULL 06/15] tests/functional: stop output from zstd command when uncompressing Thomas Huth
2025-03-07 11:53 ` [PULL 07/15] tests/functional: Move the code for testing HTTP downloads to a common function Thomas Huth
2025-03-07 11:53 ` [PULL 08/15] tests/functional/test_mips_malta: Add a network test via the pcnet NIC Thomas Huth
2025-03-07 11:53 ` [PULL 09/15] tests/functional: Increase the timeout of the mips64el_replay test Thomas Huth
2025-03-07 11:53 ` [PULL 10/15] tests/functional: fix race in virtio balloon test Thomas Huth
2025-03-07 12:34 ` Philippe Mathieu-Daudé
2025-03-07 12:39 ` Thomas Huth
2025-03-07 12:42 ` Philippe Mathieu-Daudé
2025-03-07 12:45 ` Daniel P. Berrangé
2025-03-07 11:53 ` [PULL 11/15] tests/functional/test_virtio_balloon: Only use KVM for running this test Thomas Huth
2025-03-07 11:53 ` [PULL 12/15] doc: add missing 'Asset' type in function test doc Thomas Huth
2025-03-07 11:53 ` [PULL 13/15] MAINTAINERS: Add docs/devel/testing/functional.rst to the functional section Thomas Huth
2025-03-07 11:53 ` Thomas Huth [this message]
2025-03-07 11:53 ` [PULL 15/15] s390x/pci: indicate QEMU supports relaxed translation for passthrough Thomas Huth
2025-03-09 0:41 ` [PULL 00/15] Functional and s390x patches before 10.0 soft-freeze Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250307115314.1096373-15-thuth@redhat.com \
--to=thuth@redhat.com \
--cc=david@redhat.com \
--cc=mjrosato@linux.ibm.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).