qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Peter Maydell" <peter.maydell@linaro.org>,
	"Ankit Agrawal" <ankita@nvidia.com>,
	"Alex Williamson" <alex.williamson@redhat.com>,
	"Cedric Le Goater" <clg@redhat.com>,
	"Jonathan Cameron" <Jonathan.Cameron@huawei.com>,
	"Igor Mammedov" <imammedo@redhat.com>,
	"Ani Sinha" <anisinha@redhat.com>,
	"Shannon Zhao" <shannon.zhaosl@gmail.com>,
	"Eduardo Habkost" <eduardo@habkost.net>,
	"Marcel Apfelbaum" <marcel.apfelbaum@gmail.com>,
	"Philippe Mathieu-Daudé" <philmd@linaro.org>,
	"Yanan Wang" <wangyanan55@huawei.com>,
	qemu-arm@nongnu.org
Subject: [PULL 55/68] hw/acpi: Implement the SRAT GI affinity structure
Date: Tue, 12 Mar 2024 18:28:27 -0400	[thread overview]
Message-ID: <0a5b5acdf2d8c7302ca48d42e6ef3423e1b956d5.1710282274.git.mst@redhat.com> (raw)
In-Reply-To: <cover.1710282274.git.mst@redhat.com>

From: Ankit Agrawal <ankita@nvidia.com>

ACPI spec provides a scheme to associate "Generic Initiators" [1]
(e.g. heterogeneous processors and accelerators, GPUs, and I/O devices with
integrated compute or DMA engines GPUs) with Proximity Domains. This is
achieved using Generic Initiator Affinity Structure in SRAT. During bootup,
Linux kernel parse the ACPI SRAT to determine the PXM ids and create a NUMA
node for each unique PXM ID encountered. Qemu currently do not implement
these structures while building SRAT.

Add GI structures while building VM ACPI SRAT. The association between
device and node are stored using acpi-generic-initiator object. Lookup
presence of all such objects and use them to build these structures.

The structure needs a PCI device handle [2] that consists of the device BDF.
The vfio-pci device corresponding to the acpi-generic-initiator object is
located to determine the BDF.

[1] ACPI Spec 6.3, Section 5.2.16.6
[2] ACPI Spec 6.3, Table 5.80

Cc: Jonathan Cameron <qemu-devel@nongnu.org>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Cedric Le Goater <clg@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
Message-Id: <20240308145525.10886-3-ankita@nvidia.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/acpi/acpi_generic_initiator.h | 25 ++++++++
 include/sysemu/numa.h                    |  1 +
 hw/acpi/acpi_generic_initiator.c         | 77 ++++++++++++++++++++++++
 hw/acpi/hmat.c                           |  2 +-
 hw/arm/virt-acpi-build.c                 |  3 +
 hw/core/numa.c                           |  3 +-
 6 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/include/hw/acpi/acpi_generic_initiator.h b/include/hw/acpi/acpi_generic_initiator.h
index 16de1d3d80..a304bad73e 100644
--- a/include/hw/acpi/acpi_generic_initiator.h
+++ b/include/hw/acpi/acpi_generic_initiator.h
@@ -19,4 +19,29 @@ typedef struct AcpiGenericInitiator {
     uint16_t node;
 } AcpiGenericInitiator;
 
+/*
+ * ACPI 6.3:
+ * Table 5-81 Flags – Generic Initiator Affinity Structure
+ */
+typedef enum {
+    /*
+     * If clear, the OSPM ignores the contents of the Generic
+     * Initiator/Port Affinity Structure. This allows system firmware
+     * to populate the SRAT with a static number of structures, but only
+     * enable them as necessary.
+     */
+    GEN_AFFINITY_ENABLED = (1 << 0),
+} GenericAffinityFlags;
+
+/*
+ * ACPI 6.3:
+ * Table 5-80 Device Handle - PCI
+ */
+typedef struct PCIDeviceHandle {
+    uint16_t segment;
+    uint16_t bdf;
+} PCIDeviceHandle;
+
+void build_srat_generic_pci_initiator(GArray *table_data);
+
 #endif
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 4173ef2afa..825cfe86bc 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -41,6 +41,7 @@ struct NodeInfo {
     struct HostMemoryBackend *node_memdev;
     bool present;
     bool has_cpu;
+    bool has_gi;
     uint8_t lb_info_provided;
     uint16_t initiator;
     uint8_t distance[MAX_NODES];
diff --git a/hw/acpi/acpi_generic_initiator.c b/hw/acpi/acpi_generic_initiator.c
index 130d6ae8c1..17b9a052f5 100644
--- a/hw/acpi/acpi_generic_initiator.c
+++ b/hw/acpi/acpi_generic_initiator.c
@@ -5,7 +5,9 @@
 
 #include "qemu/osdep.h"
 #include "hw/acpi/acpi_generic_initiator.h"
+#include "hw/acpi/aml-build.h"
 #include "hw/boards.h"
+#include "hw/pci/pci_device.h"
 #include "qemu/error-report.h"
 
 typedef struct AcpiGenericInitiatorClass {
@@ -47,6 +49,7 @@ static void acpi_generic_initiator_set_node(Object *obj, Visitor *v,
                                             Error **errp)
 {
     AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+    MachineState *ms = MACHINE(qdev_get_machine());
     uint32_t value;
 
     if (!visit_type_uint32(v, name, &value, errp)) {
@@ -60,6 +63,7 @@ static void acpi_generic_initiator_set_node(Object *obj, Visitor *v,
     }
 
     gi->node = value;
+    ms->numa_state->nodes[gi->node].has_gi = true;
 }
 
 static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
@@ -69,3 +73,76 @@ static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
     object_class_property_add(oc, "node", "int", NULL,
         acpi_generic_initiator_set_node, NULL, NULL);
 }
+
+/*
+ * ACPI 6.3:
+ * Table 5-78 Generic Initiator Affinity Structure
+ */
+static void
+build_srat_generic_pci_initiator_affinity(GArray *table_data, int node,
+                                          PCIDeviceHandle *handle)
+{
+    uint8_t index;
+
+    build_append_int_noprefix(table_data, 5, 1);  /* Type */
+    build_append_int_noprefix(table_data, 32, 1); /* Length */
+    build_append_int_noprefix(table_data, 0, 1);  /* Reserved */
+    build_append_int_noprefix(table_data, 1, 1);  /* Device Handle Type: PCI */
+    build_append_int_noprefix(table_data, node, 4);  /* Proximity Domain */
+
+    /* Device Handle - PCI */
+    build_append_int_noprefix(table_data, handle->segment, 2);
+    build_append_int_noprefix(table_data, handle->bdf, 2);
+    for (index = 0; index < 12; index++) {
+        build_append_int_noprefix(table_data, 0, 1);
+    }
+
+    build_append_int_noprefix(table_data, GEN_AFFINITY_ENABLED, 4); /* Flags */
+    build_append_int_noprefix(table_data, 0, 4);     /* Reserved */
+}
+
+static int build_all_acpi_generic_initiators(Object *obj, void *opaque)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    AcpiGenericInitiator *gi;
+    GArray *table_data = opaque;
+    PCIDeviceHandle dev_handle;
+    PCIDevice *pci_dev;
+    Object *o;
+
+    if (!object_dynamic_cast(obj, TYPE_ACPI_GENERIC_INITIATOR)) {
+        return 0;
+    }
+
+    gi = ACPI_GENERIC_INITIATOR(obj);
+    if (gi->node >= ms->numa_state->num_nodes) {
+        error_printf("%s: Specified node %d is invalid.\n",
+                     TYPE_ACPI_GENERIC_INITIATOR, gi->node);
+        exit(1);
+    }
+
+    o = object_resolve_path_type(gi->pci_dev, TYPE_PCI_DEVICE, NULL);
+    if (!o) {
+        error_printf("%s: Specified device must be a PCI device.\n",
+                     TYPE_ACPI_GENERIC_INITIATOR);
+        exit(1);
+    }
+
+    pci_dev = PCI_DEVICE(o);
+
+    dev_handle.segment = 0;
+    dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
+                                               pci_dev->devfn);
+
+    build_srat_generic_pci_initiator_affinity(table_data,
+                                              gi->node, &dev_handle);
+
+    return 0;
+}
+
+void build_srat_generic_pci_initiator(GArray *table_data)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   build_all_acpi_generic_initiators,
+                                   table_data);
+}
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 3042d223c8..2242981e18 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -214,7 +214,7 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
     }
 
     for (i = 0; i < numa_state->num_nodes; i++) {
-        if (numa_state->nodes[i].has_cpu) {
+        if (numa_state->nodes[i].has_cpu || numa_state->nodes[i].has_gi) {
             initiator_list[num_initiator++] = i;
         }
     }
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 6a1bde61ce..c3ccfef026 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -57,6 +57,7 @@
 #include "migration/vmstate.h"
 #include "hw/acpi/ghes.h"
 #include "hw/acpi/viot.h"
+#include "hw/acpi/acpi_generic_initiator.h"
 #include "hw/virtio/virtio-acpi.h"
 #include "target/arm/multiprocessing.h"
 
@@ -504,6 +505,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
         }
     }
 
+    build_srat_generic_pci_initiator(table_data);
+
     if (ms->nvdimms_state->is_enabled) {
         nvdimm_build_srat(table_data);
     }
diff --git a/hw/core/numa.c b/hw/core/numa.c
index f08956ddb0..58a32f1564 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -229,7 +229,8 @@ void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
                    node->target, numa_state->num_nodes);
         return;
     }
-    if (!numa_info[node->initiator].has_cpu) {
+    if (!numa_info[node->initiator].has_cpu &&
+        !numa_info[node->initiator].has_gi) {
         error_setg(errp, "Invalid initiator=%d, it isn't an "
                    "initiator proximity domain", node->initiator);
         return;
-- 
MST



  parent reply	other threads:[~2024-03-12 22:40 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-12 22:25 [PULL 00/68] virtio,pc,pci: features, cleanups, fixes Michael S. Tsirkin
2024-03-12 22:25 ` [PULL 01/68] vdpa: add back vhost_vdpa_net_first_nc_vdpa Michael S. Tsirkin
2024-03-12 22:25 ` [PULL 02/68] vdpa: factor out vhost_vdpa_last_dev Michael S. Tsirkin
2024-03-12 22:25 ` [PULL 03/68] vdpa: factor out vhost_vdpa_net_get_nc_vdpa Michael S. Tsirkin
2024-03-12 22:25 ` [PULL 04/68] vdpa: add vhost_vdpa_set_address_space_id trace Michael S. Tsirkin
2024-03-12 22:25 ` [PULL 05/68] vdpa: add vhost_vdpa_get_vring_base trace for svq mode Michael S. Tsirkin
2024-03-12 22:25 ` [PULL 06/68] vdpa: add vhost_vdpa_set_dev_vring_base " Michael S. Tsirkin
2024-03-12 22:25 ` [PULL 07/68] vdpa: add trace events for vhost_vdpa_net_load_cmd Michael S. Tsirkin
2024-03-12 22:25 ` [PULL 08/68] vdpa: add trace event for vhost_vdpa_net_load_mq Michael S. Tsirkin
2024-03-12 22:25 ` [PULL 09/68] vdpa: define SVQ transitioning state for mode switching Michael S. Tsirkin
2024-03-12 22:25 ` [PULL 10/68] vdpa: indicate transitional state for SVQ switching Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 11/68] vdpa: fix network breakage after cancelling migration Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 12/68] libvhost-user: Dynamically allocate memory for memory slots Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 13/68] libvhost-user: Bump up VHOST_USER_MAX_RAM_SLOTS to 509 Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 14/68] libvhost-user: Factor out removing all mem regions Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 15/68] libvhost-user: Merge vu_set_mem_table_exec_postcopy() into vu_set_mem_table_exec() Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 16/68] libvhost-user: Factor out adding a memory region Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 17/68] libvhost-user: No need to check for NULL when unmapping Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 18/68] libvhost-user: Don't zero out memory for memory regions Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 19/68] libvhost-user: Don't search for duplicates when removing " Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 20/68] libvhost-user: Factor out search for memory region by GPA and simplify Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 21/68] libvhost-user: Speedup gpa_to_mem_region() and vu_gpa_to_va() Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 22/68] libvhost-user: Use most of mmap_offset as fd_offset Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 23/68] libvhost-user: Factor out vq usability check Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 24/68] libvhost-user: Dynamically remap rings after (temporarily?) removing memory regions Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 25/68] libvhost-user: Mark mmap'ed region memory as MADV_DONTDUMP Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 26/68] pcie: Support PCIe Gen5/Gen6 link speeds Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 27/68] vdpa: stash memory region properties in vars Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 28/68] vdpa: trace skipped memory sections Michael S. Tsirkin
2024-03-12 22:26 ` [PULL 29/68] hw/pci-bridge/pxb-cxl: Drop RAS capability from host bridge Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 30/68] hw/audio/virtio-sound: return correct command response size Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 31/68] hw/virtio: check owner for removing objects Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 32/68] hw/virtio: Add support for VDPA network simulation devices Michael S. Tsirkin
2024-03-20  8:58   ` Paolo Bonzini
2024-03-20 13:12     ` Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 33/68] hw/cxl/cxl-host: Fix missing ERRP_GUARD() in cxl_fixed_memory_window_config() Michael S. Tsirkin
2024-03-13  2:20   ` Zhao Liu
2024-03-12 22:27 ` [PULL 34/68] hw/display/macfb: Fix missing ERRP_GUARD() in macfb_nubus_realize() Michael S. Tsirkin
2024-03-13  2:23   ` Zhao Liu
2024-03-13 10:19     ` Philippe Mathieu-Daudé
2024-03-12 22:27 ` [PULL 35/68] hw/mem/cxl_type3: Fix missing ERRP_GUARD() in ct3_realize() Michael S. Tsirkin
2024-03-13  2:25   ` Zhao Liu
2024-03-12 22:27 ` [PULL 36/68] hw/misc/xlnx-versal-trng: Check returned bool in trng_prop_fault_event_set() Michael S. Tsirkin
2024-03-13  2:26   ` Zhao Liu
2024-03-12 22:27 ` [PULL 37/68] hw/pci-bridge/cxl_upstream: Fix missing ERRP_GUARD() in cxl_usp_realize() Michael S. Tsirkin
2024-03-13  2:28   ` Zhao Liu
2024-03-12 22:27 ` [PULL 38/68] hw/vfio/iommufd: Fix missing ERRP_GUARD() in iommufd_cdev_getfd() Michael S. Tsirkin
2024-03-13  2:39   ` Zhao Liu
2024-03-12 22:27 ` [PULL 39/68] hw/intc: Check @errp to handle the error of IOAPICCommonClass.realize() Michael S. Tsirkin
2024-03-13  2:37   ` Zhao Liu
2024-03-12 22:27 ` [PULL 40/68] Implement base of SMBIOS type 9 descriptor Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 41/68] Implement SMBIOS type 9 v2.6 Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 42/68] hw/nvme: Use pcie_sriov_num_vfs() Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 43/68] pcie_sriov: Validate NumVFs Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 44/68] pcie_sriov: Reset SR-IOV extended capability Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 45/68] pcie_sriov: Do not reset NumVFs after disabling VFs Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 46/68] hw/pci: Always call pcie_sriov_pf_reset() Michael S. Tsirkin
2024-03-12 22:27 ` [PULL 47/68] pc: q35: Bump max_cpus to 4096 vcpus Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 48/68] Revert "hw/i386/pc_sysfw: Inline pc_system_flash_create() and remove it" Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 49/68] Revert "hw/i386/pc: Confine system flash handling to pc_sysfw" Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 50/68] hw/i386/pc: Remove "rtc_state" link again Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 51/68] hw/i386/pc: Avoid one use of the current_machine global Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 52/68] hw/i386/pc: Set "normal" boot device order in pc_basic_device_init() Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 53/68] hw/i386/pc: Inline pc_cmos_init() into pc_cmos_init_late() and remove it Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 54/68] qom: new object to associate device to NUMA node Michael S. Tsirkin
2024-03-12 22:28 ` Michael S. Tsirkin [this message]
2024-03-12 22:28 ` [PULL 56/68] hw/i386/acpi-build: Add support for SRAT Generic Initiator structures Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 57/68] virtio-iommu: Add a granule property Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 58/68] virtio-iommu: Change the default granule to the host page size Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 59/68] qemu-options.hx: Document the virtio-iommu-pci granule option Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 60/68] virtio-iommu: Trace domain range limits as unsigned int Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 61/68] virtio-iommu: Add an option to define the input range width Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 62/68] hw/i386/q35: Set virtio-iommu aw-bits default value to 39 Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 63/68] hw/arm/virt: Set virtio-iommu aw-bits default value to 48 Michael S. Tsirkin
2024-03-12 22:28 ` [PULL 64/68] qemu-options.hx: Document the virtio-iommu-pci aw-bits option Michael S. Tsirkin
2024-03-12 22:29 ` [PULL 65/68] hmat acpi: Do not add Memory Proximity Domain Attributes Structure targetting non existent memory Michael S. Tsirkin
2024-03-12 22:29 ` [PULL 66/68] hmat acpi: Fix out of bounds access due to missing use of indirection Michael S. Tsirkin
2024-03-12 22:29 ` [PULL 67/68] hw/cxl: Fix missing reserved data in CXL Device DVSEC Michael S. Tsirkin
2024-03-12 22:29 ` [PULL 68/68] docs/specs/pvpanic: document shutdown event Michael S. Tsirkin
2024-03-13 17:07 ` [PULL 00/68] virtio,pc,pci: features, cleanups, fixes Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0a5b5acdf2d8c7302ca48d42e6ef3423e1b956d5.1710282274.git.mst@redhat.com \
    --to=mst@redhat.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=alex.williamson@redhat.com \
    --cc=anisinha@redhat.com \
    --cc=ankita@nvidia.com \
    --cc=clg@redhat.com \
    --cc=eduardo@habkost.net \
    --cc=imammedo@redhat.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=peter.maydell@linaro.org \
    --cc=philmd@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=shannon.zhaosl@gmail.com \
    --cc=wangyanan55@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).