From: Eric Auger <eric.auger@redhat.com>
To: Nicolin Chen <nicolinc@nvidia.com>,
peter.maydell@linaro.org, shannon.zhaosl@gmail.com,
mst@redhat.com, imammedo@redhat.com, anisinha@redhat.com,
peterx@redhat.com
Cc: qemu-arm@nongnu.org, qemu-devel@nongnu.org, jgg@nvidia.com,
shameerali.kolothum.thodi@huawei.com, jasowang@redhat.com,
Andrea Bolognani <abologna@redhat.com>
Subject: Re: [PATCH RFCv1 06/10] hw/arm/virt: Assign vfio-pci devices to nested SMMUs
Date: Tue, 9 Jul 2024 15:32:41 +0200 [thread overview]
Message-ID: <d59d2d2a-e6b7-4dde-9be7-56986f3fdf91@redhat.com> (raw)
In-Reply-To: <67c6311756de2a6e827e3dd0563f939dcf334418.1719361174.git.nicolinc@nvidia.com>
On 6/26/24 02:28, Nicolin Chen wrote:
> With iommu=nested-smmuv3, there could be multiple nested SMMU instances in
> the vms. A passthrough device must to look up for its iommu handler in its
> sysfs node, and then link to the nested SMMU instance created for the same
> iommu handler. This isn't easy to do.
>
> Add an auto-assign piece after all vSMMU backed pxb buses are created. It
> loops the existing input devices, and sets/replaces their pci bus numbers
> with a newly created pcie-root-port to the pxb bus.
Here again I don't think it is acceptable to create such topology under
the hood. Libvirt shall master the whole PCIe topology.
Eric
>
> Note that this is not an ideal solution to handle hot plug device.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
> hw/arm/virt.c | 110 ++++++++++++++++++++++++++++++++++++++++++
> include/hw/arm/virt.h | 13 +++++
> 2 files changed, 123 insertions(+)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index a54332fca8..3610f53304 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -38,6 +38,7 @@
> #include "hw/arm/primecell.h"
> #include "hw/arm/virt.h"
> #include "hw/block/flash.h"
> +#include "hw/vfio/pci.h"
> #include "hw/vfio/vfio-calxeda-xgmac.h"
> #include "hw/vfio/vfio-amd-xgbe.h"
> #include "hw/display/ramfb.h"
> @@ -1491,6 +1492,112 @@ static void create_virtio_iommu_dt_bindings(VirtMachineState *vms)
> bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf);
> }
>
> +static char *create_new_pcie_port(VirtNestedSmmu *nested_smmu, Error **errp)
> +{
> + uint32_t port_nr = nested_smmu->pci_bus->qbus.num_children;
> + uint32_t chassis_nr = UINT8_MAX - nested_smmu->index;
> + uint32_t bus_nr = pci_bus_num(nested_smmu->pci_bus);
> + DeviceState *dev;
> + char *name_port;
> +
> + /* Create a root port */
> + dev = qdev_new("pcie-root-port");
> + name_port = g_strdup_printf("smmu_bus0x%x_port%d", bus_nr, port_nr);
> +
> + if (!qdev_set_id(dev, name_port, &error_fatal)) {
> + /* FIXME retry with a different port num? */
> + error_setg(errp, "Could not set pcie-root-port ID %s", name_port);
> + g_free(name_port);
> + g_free(dev);
> + return NULL;
> + }
> + qdev_prop_set_uint32(dev, "chassis", chassis_nr);
> + qdev_prop_set_uint32(dev, "slot", port_nr);
> + qdev_prop_set_uint64(dev, "io-reserve", 0);
> + qdev_realize_and_unref(dev, BUS(nested_smmu->pci_bus), &error_fatal);
> + return name_port;
> +}
> +
> +static int assign_nested_smmu(void *opaque, QemuOpts *opts, Error **errp)
> +{
> + VirtMachineState *vms = (VirtMachineState *)opaque;
> + const char *sysfsdev = qemu_opt_get(opts, "sysfsdev");
> + const char *iommufd = qemu_opt_get(opts, "iommufd");
> + const char *driver = qemu_opt_get(opts, "driver");
> + const char *host = qemu_opt_get(opts, "host");
> + const char *bus = qemu_opt_get(opts, "bus");
> + VirtNestedSmmu *nested_smmu;
> + char *link_iommu;
> + char *dir_iommu;
> + char *smmu_node;
> + char *name_port;
> + int ret = 0;
> +
> + if (!iommufd || !driver) {
> + return 0;
> + }
> + if (!sysfsdev && !host) {
> + return 0;
> + }
> + if (strncmp(driver, TYPE_VFIO_PCI, strlen(TYPE_VFIO_PCI))) {
> + return 0;
> + }
> + /* If the device wants to attach to the default bus, do not reassign it */
> + if (bus && !strncmp(bus, "pcie.0", strlen(bus))) {
> + return 0;
> + }
> +
> + if (sysfsdev) {
> + link_iommu = g_strdup_printf("%s/iommu", sysfsdev);
> + } else {
> + link_iommu = g_strdup_printf("/sys/bus/pci/devices/%s/iommu", host);
> + }
> +
> + dir_iommu = realpath(link_iommu, NULL);
> + if (!dir_iommu) {
> + error_setg(errp, "Could not get the real path for iommu link: %s",
> + link_iommu);
> + ret = -EINVAL;
> + goto free_link;
> + }
> +
> + smmu_node = g_path_get_basename(dir_iommu);
> + if (!smmu_node) {
> + error_setg(errp, "Could not get SMMU node name for iommu at: %s",
> + dir_iommu);
> + ret = -EINVAL;
> + goto free_dir;
> + }
> +
> + nested_smmu = find_nested_smmu_by_sysfs(vms, smmu_node);
> + if (!nested_smmu) {
> + error_setg(errp, "Could not find any detected SMMU matching node: %s",
> + smmu_node);
> + ret = -EINVAL;
> + goto free_node;
> + }
> +
> + name_port = create_new_pcie_port(nested_smmu, errp);
> + if (!name_port) {
> + ret = -EBUSY;
> + goto free_node;
> + }
> +
> + qemu_opt_set(opts, "bus", name_port, &error_fatal);
> + if (bus) {
> + error_report("overriding PCI bus %s to %s for device %s [%s]",
> + bus, name_port, host, sysfsdev);
> + }
> +
> +free_node:
> + free(smmu_node);
> +free_dir:
> + free(dir_iommu);
> +free_link:
> + free(link_iommu);
> + return ret;
> +}
> +
> /*
> * FIXME this is used to reverse for hotplug devices, yet it could result in a
> * big waste of PCI bus numbners.
> @@ -1669,6 +1776,9 @@ static void create_pcie(VirtMachineState *vms)
> qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", 0x0,
> vms->nested_smmu_phandle[i], 0x0, 0x10000);
> }
> +
> + qemu_opts_foreach(qemu_find_opts("device"),
> + assign_nested_smmu, vms, &error_fatal);
> } else if (vms->iommu) {
> vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt);
>
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index 0a3f1ab8b5..dfbc4bba3c 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -246,4 +246,17 @@ find_nested_smmu_by_index(VirtMachineState *vms, int index)
> return NULL;
> }
>
> +static inline VirtNestedSmmu *
> +find_nested_smmu_by_sysfs(VirtMachineState *vms, char *node)
> +{
> + VirtNestedSmmu *nested_smmu;
> +
> + QLIST_FOREACH(nested_smmu, &vms->nested_smmu_list, next) {
> + if (!strncmp(nested_smmu->smmu_node, node, strlen(node))) {
> + return nested_smmu;
> + }
> + }
> + return NULL;
> +}
> +
> #endif /* QEMU_ARM_VIRT_H */
next prev parent reply other threads:[~2024-07-09 13:34 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-26 0:28 [PATCH RFCv1 00/10] hw/arm/virt: Add multiple nested SMMUs Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 01/10] hw/arm/virt-acpi-build: Add IORT RMR regions to handle MSI nested binding Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 02/10] hw/arm/virt: Add iommufd link to virt-machine Nicolin Chen
2024-07-09 9:11 ` Eric Auger
2024-07-09 16:59 ` Nicolin Chen
2024-07-09 17:06 ` Eric Auger
2024-07-09 17:18 ` Nicolin Chen
2024-07-10 2:32 ` Duan, Zhenzhong
2024-06-26 0:28 ` [PATCH RFCv1 03/10] hw/arm/virt: Get the number of host-level SMMUv3 instances Nicolin Chen
2024-07-09 9:20 ` Eric Auger
2024-07-09 17:11 ` Nicolin Chen
2024-07-09 17:22 ` Eric Auger
2024-07-09 18:02 ` Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 04/10] hw/arm/virt: Add an SMMU_IO_LEN macro Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 05/10] hw/arm/virt: Add VIRT_NESTED_SMMU Nicolin Chen
2024-07-09 13:26 ` Eric Auger
2024-07-09 17:59 ` Nicolin Chen
2024-07-11 15:48 ` Andrea Bolognani
2024-07-11 17:57 ` Jason Gunthorpe
2024-06-26 0:28 ` [PATCH RFCv1 06/10] hw/arm/virt: Assign vfio-pci devices to nested SMMUs Nicolin Chen
2024-07-09 13:32 ` Eric Auger [this message]
2024-06-26 0:28 ` [PATCH RFCv1 07/10] hw/arm/virt: Bypass iommu for default PCI bus Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 08/10] hw/arm/virt-acpi-build: Handle reserved bus number of pxb buses Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 09/10] hw/arm/virt-acpi-build: Build IORT with multiple SMMU nodes Nicolin Chen
2024-06-26 0:28 ` [PATCH RFCv1 10/10] hw/arm/virt-acpi-build: Enable ATS for nested SMMUv3 Nicolin Chen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d59d2d2a-e6b7-4dde-9be7-56986f3fdf91@redhat.com \
--to=eric.auger@redhat.com \
--cc=abologna@redhat.com \
--cc=anisinha@redhat.com \
--cc=imammedo@redhat.com \
--cc=jasowang@redhat.com \
--cc=jgg@nvidia.com \
--cc=mst@redhat.com \
--cc=nicolinc@nvidia.com \
--cc=peter.maydell@linaro.org \
--cc=peterx@redhat.com \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=shameerali.kolothum.thodi@huawei.com \
--cc=shannon.zhaosl@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).