From: Eric Auger <eric.auger@redhat.com>
To: Nicolin Chen <nicolinc@nvidia.com>,
peter.maydell@linaro.org, shannon.zhaosl@gmail.com,
mst@redhat.com, imammedo@redhat.com, anisinha@redhat.com,
peterx@redhat.com
Cc: qemu-arm@nongnu.org, qemu-devel@nongnu.org, jgg@nvidia.com,
shameerali.kolothum.thodi@huawei.com, jasowang@redhat.com,
Andrea Bolognani <abologna@redhat.com>
Subject: Re: [PATCH RFCv1 05/10] hw/arm/virt: Add VIRT_NESTED_SMMU
Date: Tue, 9 Jul 2024 15:26:58 +0200 [thread overview]
Message-ID: <9c3e95c2-1035-4a55-89a3-97165ef32f18@redhat.com> (raw)
In-Reply-To: <bc7a57311ac4976699789ceca329edfdfe823c2d.1719361174.git.nicolinc@nvidia.com>
Hi Nicolin,
On 6/26/24 02:28, Nicolin Chen wrote:
> VIRT_SMMU can be used as an emulated SMMU, i.e. iommu=smmuv3. However, the
> MMIO space for VIRT_SMMU isn't large enough to accommodate multiple nested
> SMMUv3 instances. Add a new VIRT_NESTED_SMMU to separate MMIO space.
>
> Meanwhile, a nested SMMUv3 could only work with a vfio-pci device that is
> physically associated to a host-level SMMUv3 instance, meaning that such a
> passthrough deivce must be linked to a nesteed SMMU corresponding to the
> underlying host-level SMMUv3 instance:
> HW SMMU0 <-----------------> vSMMU0 (nested)
> ^ ^
> | |
> ----> PCI dev0 & PCI dev1 <----
> HW SMMU1 <-----------------> vSMMU1 (nested)
> ^ ^
> | |
> ----> PCI dev2 & PCI dev3 <----
>
> Note that, on the other hand, an emulated deivce should not be associated
> to a nested SMMU.
>
> To prepare for that, create a PCIe Expander Bridge per nested SMMU as a
> docker for pci-vfio devices. It eases the QEMU code to build ID mappings
> in the IORT.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
> hw/arm/virt.c | 110 +++++++++++++++++++++++++++++++++++++-----
> include/hw/arm/virt.h | 17 +++++++
> 2 files changed, 116 insertions(+), 11 deletions(-)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index ef59c79ca3..a54332fca8 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -55,6 +55,7 @@
> #include "qemu/bitops.h"
> #include "qemu/error-report.h"
> #include "qemu/module.h"
> +#include "hw/pci/pci_bus.h"
> #include "hw/pci-host/gpex.h"
> #include "hw/virtio/virtio-pci.h"
> #include "hw/core/sysbus-fdt.h"
> @@ -83,6 +84,7 @@
> #include "hw/virtio/virtio-md-pci.h"
> #include "hw/virtio/virtio-iommu.h"
> #include "hw/char/pl011.h"
> +#include "qemu/config-file.h"
> #include "qemu/guest-random.h"
>
> static GlobalProperty arm_virt_compat[] = {
> @@ -177,6 +179,7 @@ static const MemMapEntry base_memmap[] = {
> [VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
> [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
> [VIRT_MMIO] = { 0x0a000000, 0x00000200 },
> + [VIRT_NESTED_SMMU] = { 0x0b000000, 0x01000000 },
> /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
> [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
> [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 },
> @@ -221,7 +224,8 @@ static const int a15irqmap[] = {
> [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
> [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
> [VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */
> - [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
> + [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 (179) */
> + [VIRT_NESTED_SMMU] = 200, /* Keep it at the end of list */
> };
>
> static void create_randomness(MachineState *ms, const char *node)
> @@ -1383,21 +1387,19 @@ static void create_pcie_irq_map(const MachineState *ms,
> 0x7 /* PCI irq */);
> }
>
> -static void create_smmu(const VirtMachineState *vms,
> - PCIBus *bus)
> +static DeviceState *_create_smmu(const VirtMachineState *vms, PCIBus *bus,
> + hwaddr base, hwaddr size, int irq,
> + uint32_t smmu_phandle)
> {
> char *node;
> const char compat[] = "arm,smmu-v3";
> - int irq = vms->irqmap[VIRT_SMMU];
> int i;
> - hwaddr base = vms->memmap[VIRT_SMMU].base;
> - hwaddr size = vms->memmap[VIRT_SMMU].size;
> const char irq_names[] = "eventq\0priq\0cmdq-sync\0gerror";
> DeviceState *dev;
> MachineState *ms = MACHINE(vms);
>
> - if (!virt_has_smmuv3(vms) || !vms->iommu_phandle) {
> - return;
> + if (!virt_has_smmuv3(vms) || !smmu_phandle) {
> + return NULL;
> }
>
> dev = qdev_new(TYPE_ARM_SMMUV3);
> @@ -1436,8 +1438,31 @@ static void create_smmu(const VirtMachineState *vms,
>
> qemu_fdt_setprop_cell(ms->fdt, node, "#iommu-cells", 1);
>
> - qemu_fdt_setprop_cell(ms->fdt, node, "phandle", vms->iommu_phandle);
> + qemu_fdt_setprop_cell(ms->fdt, node, "phandle", smmu_phandle);
> g_free(node);
> +
> + return dev;
> +}
> +
> +static DeviceState *create_smmu(const VirtMachineState *vms, PCIBus *bus)
> +{
> + hwaddr base = vms->memmap[VIRT_SMMU].base;
> + hwaddr size = vms->memmap[VIRT_SMMU].size;
> + int irq = vms->irqmap[VIRT_SMMU];
> +
> + return _create_smmu(vms, bus, base, size, irq, vms->iommu_phandle);
> +}
> +
> +static DeviceState *create_nested_smmu(const VirtMachineState *vms, PCIBus *bus,
> + int i)
> +{
> + hwaddr base = vms->memmap[VIRT_NESTED_SMMU].base + i * SMMU_IO_LEN;
> + int irq = vms->irqmap[VIRT_SMMU] + i * NUM_SMMU_IRQS;
> + hwaddr size = SMMU_IO_LEN;
> + DeviceState *dev;
> +
> + dev = _create_smmu(vms, bus, base, size, irq, vms->nested_smmu_phandle[i]);
> + return dev;
> }
>
> static void create_virtio_iommu_dt_bindings(VirtMachineState *vms)
> @@ -1466,6 +1491,48 @@ static void create_virtio_iommu_dt_bindings(VirtMachineState *vms)
> bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf);
> }
>
> +/*
> + * FIXME this is used to reverse for hotplug devices, yet it could result in a
> + * big waste of PCI bus numbners.
> + */
> +#define MAX_DEV_PER_NESTED_SMMU (4)
> +
> +static PCIBus *create_pcie_expander_bridge(VirtMachineState *vms, uint8_t idx)
> +{
> + /* Total = PXB + MAX_DEV_PER_NESTED_SMMU x (Rootport + device) */
> + const uint8_t total_bus_nums = 1 + MAX_DEV_PER_NESTED_SMMU * 2;
> + uint8_t bus_nr = PCI_BUS_MAX -
> + (vms->num_nested_smmus - idx) * total_bus_nums;
> +
> + VirtNestedSmmu *nested_smmu = find_nested_smmu_by_index(vms, idx);
> + char *name_pxb = g_strdup_printf("pxb_for_smmu.%d", idx);
> + PCIBus *bus = vms->bus;
> + DeviceState *dev;
> +
> + /* Create an expander bridge */
> + dev = qdev_new("pxb-pcie");
> + if (!qdev_set_id(dev, name_pxb, &error_fatal)) {
> + return NULL;
> + }
> +
> + qdev_prop_set_uint8(dev, "bus_nr", bus_nr);
> + qdev_prop_set_uint16(dev, "numa_node", idx);
> + qdev_realize_and_unref(dev, BUS(bus), &error_fatal);
> +
> + /* Get the pxb bus */
> + QLIST_FOREACH(bus, &bus->child, sibling) {
> + if (pci_bus_num(bus) == bus_nr) {
> + break;
> + }
> + }
> + g_assert(bus && pci_bus_num(bus) == bus_nr);
> + nested_smmu->pci_bus = bus;
> + nested_smmu->reserved_bus_nums = total_bus_nums;
> +
> + /* Return the pxb bus */
> + return bus;
> +}
> +
> static void create_pcie(VirtMachineState *vms)
> {
> hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base;
> @@ -1580,12 +1647,33 @@ static void create_pcie(VirtMachineState *vms)
> qemu_fdt_setprop_cell(ms->fdt, nodename, "#interrupt-cells", 1);
> create_pcie_irq_map(ms, vms->gic_phandle, irq, nodename);
>
> - if (vms->iommu) {
> + /* Build PCI Expander Bridge + Root Port from the top of PCI_BUS_MAX */
> + if (vms->num_nested_smmus) {
> + /* VIRT_NESTED_SMMU must hold all vSMMUs */
> + g_assert(vms->num_nested_smmus <=
> + vms->memmap[VIRT_NESTED_SMMU].size / SMMU_IO_LEN);
> +
> + vms->nested_smmu_phandle = g_new0(uint32_t, vms->num_nested_smmus);
> +
> + for (i = 0; i < vms->num_nested_smmus; i++) {
> + DeviceState *smmu_dev;
> + PCIBus *pxb_bus;
> +
> + pxb_bus = create_pcie_expander_bridge(vms, i);
> + g_assert(pxb_bus);
> +
> + vms->nested_smmu_phandle[i] = qemu_fdt_alloc_phandle(ms->fdt);
> + smmu_dev = create_nested_smmu(vms, pxb_bus, i);
> + g_assert(smmu_dev);
> +
> + qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", 0x0,
> + vms->nested_smmu_phandle[i], 0x0, 0x10000);
I think libvirt is supposed to create the pcie bus topology instead and
it shall not be created by qemu behind the scene.
The pcie elements you create here are not visible to libvirt and I guess
they may collide with elements explicitly created by libvirt at a given
pci bdf.
I think it would make more sense to be able to attach an smmu instance
to a given pci root or pxb either by adding an iommu id to a given
pxb-pcie option
-device
pxb-pcie,bus_nr=100,id=pci.12,numa_node=0,bus=pcie.0,addr=0x3,iommu=<id>
or
adding a list of pxb ids to the iommu option. It is unfortunate the
iommu option is a machine option. the smmu being a sysbus device the
platform bus framework could be considered to dynamically allocate them
using the -device option. This has been used along with dt generation
but with ACPI this would need to be studied. However at the time the
smmu was integrated the machine option was prefered.
Maybe using the 1st option would allow to infer that if there are
different iommu ids this implies that several IOMMU instances need to be
created.
Adding Andrea in cc
Thanks
Eric
> + }
> + } else if (vms->iommu) {
> vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt);
>
> switch (vms->iommu) {
> case VIRT_IOMMU_SMMUV3:
> - case VIRT_IOMMU_NESTED_SMMUV3:
> create_smmu(vms, vms->bus);
> qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map",
> 0x0, vms->iommu_phandle, 0x0, 0x10000);
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index b35c4f62d7..0a3f1ab8b5 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -63,6 +63,7 @@ enum {
> VIRT_GIC_ITS,
> VIRT_GIC_REDIST,
> VIRT_SMMU,
> + VIRT_NESTED_SMMU,
> VIRT_UART,
> VIRT_MMIO,
> VIRT_RTC,
> @@ -141,6 +142,8 @@ struct VirtMachineClass {
> typedef struct VirtNestedSmmu {
> int index;
> char *smmu_node;
> + PCIBus *pci_bus;
> + int reserved_bus_nums;
> QLIST_ENTRY(VirtNestedSmmu) next;
> } VirtNestedSmmu;
>
> @@ -179,6 +182,7 @@ struct VirtMachineState {
> uint32_t gic_phandle;
> uint32_t msi_phandle;
> uint32_t iommu_phandle;
> + uint32_t *nested_smmu_phandle;
> int psci_conduit;
> hwaddr highest_gpa;
> DeviceState *gic;
> @@ -229,4 +233,17 @@ static inline bool virt_has_smmuv3(const VirtMachineState *vms)
> vms->iommu == VIRT_IOMMU_NESTED_SMMUV3;
> }
>
> +static inline VirtNestedSmmu *
> +find_nested_smmu_by_index(VirtMachineState *vms, int index)
> +{
> + VirtNestedSmmu *nested_smmu;
> +
> + QLIST_FOREACH(nested_smmu, &vms->nested_smmu_list, next) {
> + if (nested_smmu->index == index) {
> + return nested_smmu;
> + }
> + }
> + return NULL;
> +}
> +
> #endif /* QEMU_ARM_VIRT_H */
next prev parent reply other threads:[~2024-07-09 13:28 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-26 0:28 [PATCH RFCv1 00/10] hw/arm/virt: Add multiple nested SMMUs Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 01/10] hw/arm/virt-acpi-build: Add IORT RMR regions to handle MSI nested binding Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 02/10] hw/arm/virt: Add iommufd link to virt-machine Nicolin Chen
2024-07-09 9:11 ` Eric Auger
2024-07-09 16:59 ` Nicolin Chen
2024-07-09 17:06 ` Eric Auger
2024-07-09 17:18 ` Nicolin Chen
2024-07-10 2:32 ` Duan, Zhenzhong
2024-06-26 0:28 ` [PATCH RFCv1 03/10] hw/arm/virt: Get the number of host-level SMMUv3 instances Nicolin Chen
2024-07-09 9:20 ` Eric Auger
2024-07-09 17:11 ` Nicolin Chen
2024-07-09 17:22 ` Eric Auger
2024-07-09 18:02 ` Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 04/10] hw/arm/virt: Add an SMMU_IO_LEN macro Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 05/10] hw/arm/virt: Add VIRT_NESTED_SMMU Nicolin Chen
2024-07-09 13:26 ` Eric Auger [this message]
2024-07-09 17:59 ` Nicolin Chen
2024-07-11 15:48 ` Andrea Bolognani
2024-07-11 17:57 ` Jason Gunthorpe
2024-06-26 0:28 ` [PATCH RFCv1 06/10] hw/arm/virt: Assign vfio-pci devices to nested SMMUs Nicolin Chen
2024-07-09 13:32 ` Eric Auger
2024-06-26 0:28 ` [PATCH RFCv1 07/10] hw/arm/virt: Bypass iommu for default PCI bus Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 08/10] hw/arm/virt-acpi-build: Handle reserved bus number of pxb buses Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 09/10] hw/arm/virt-acpi-build: Build IORT with multiple SMMU nodes Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 10/10] hw/arm/virt-acpi-build: Enable ATS for nested SMMUv3 Nicolin Chen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9c3e95c2-1035-4a55-89a3-97165ef32f18@redhat.com \
--to=eric.auger@redhat.com \
--cc=abologna@redhat.com \
--cc=anisinha@redhat.com \
--cc=imammedo@redhat.com \
--cc=jasowang@redhat.com \
--cc=jgg@nvidia.com \
--cc=mst@redhat.com \
--cc=nicolinc@nvidia.com \
--cc=peter.maydell@linaro.org \
--cc=peterx@redhat.com \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=shameerali.kolothum.thodi@huawei.com \
--cc=shannon.zhaosl@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).