qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] hw/acpi-build: build SRAT memory affinity structures for NVDIMM
@ 2018-02-17  6:31 Haozhong Zhang
  2018-02-20 14:10 ` Igor Mammedov
  2018-02-24  1:17 ` no-reply
  0 siblings, 2 replies; 11+ messages in thread
From: Haozhong Zhang @ 2018-02-17  6:31 UTC (permalink / raw)
  To: qemu-devel
  Cc: mst, Igor Mammedov, Xiao Guangrong, Paolo Bonzini,
	Richard Henderson, Eduardo Habkost, Marcel Apfelbaum,
	Stefan Hajnoczi, Dan Williams, Haozhong Zhang

ACPI 6.2A Table 5-129 "SPA Range Structure" requires the proximity
domain of a NVDIMM SPA range must match with corresponding entry in
SRAT table.

The address ranges of vNVDIMM in QEMU are allocated from the
hot-pluggable address space, which is entirely covered by one SRAT
memory affinity structure. However, users can set the vNVDIMM
proximity domain in NFIT SPA range structure by the 'node' property of
'-device nvdimm' to a value different than the one in the above SRAT
memory affinity structure.

In order to solve such proximity domain mismatch, this patch build one
SRAT memory affinity structure for each NVDIMM device with the
proximity domain used in NFIT. The remaining hot-pluggable address
space is covered by one or multiple SRAT memory affinity structures
with the proximity domain of the last node as before.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
 hw/acpi/nvdimm.c        | 15 +++++++++++++--
 hw/i386/acpi-build.c    | 47 +++++++++++++++++++++++++++++++++++++++++++----
 include/hw/mem/nvdimm.h | 11 +++++++++++
 3 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 59d6e4254c..dff0818e77 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -33,12 +33,23 @@
 #include "hw/nvram/fw_cfg.h"
 #include "hw/mem/nvdimm.h"
 
+static gint nvdimm_addr_sort(gconstpointer a, gconstpointer b)
+{
+    uint64_t addr0 = object_property_get_uint(OBJECT(NVDIMM(a)),
+                                              PC_DIMM_ADDR_PROP, NULL);
+    uint64_t addr1 = object_property_get_uint(OBJECT(NVDIMM(b)),
+                                              PC_DIMM_ADDR_PROP, NULL);
+
+    return addr0 < addr1 ? -1 :
+           addr0 > addr1 ?  1 : 0;
+}
+
 static int nvdimm_device_list(Object *obj, void *opaque)
 {
     GSList **list = opaque;
 
     if (object_dynamic_cast(obj, TYPE_NVDIMM)) {
-        *list = g_slist_append(*list, DEVICE(obj));
+        *list = g_slist_insert_sorted(*list, DEVICE(obj), nvdimm_addr_sort);
     }
 
     object_child_foreach(obj, nvdimm_device_list, opaque);
@@ -52,7 +63,7 @@ static int nvdimm_device_list(Object *obj, void *opaque)
  * Note: it is the caller's responsibility to free the list to avoid
  * memory leak.
  */
-static GSList *nvdimm_get_device_list(void)
+GSList *nvdimm_get_device_list(void)
 {
     GSList *list = NULL;
 
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index deb440f286..637ac3a8f0 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2323,6 +2323,46 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
 #define HOLE_640K_START  (640 * 1024)
 #define HOLE_640K_END   (1024 * 1024)
 
+static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
+                                           uint64_t len, int default_node)
+{
+    GSList *nvdimms = nvdimm_get_device_list();
+    GSList *ent = nvdimms;
+    NVDIMMDevice *dev;
+    uint64_t end = base + len, addr, size;
+    int node;
+    AcpiSratMemoryAffinity *numamem;
+
+    while (base < end) {
+        numamem = acpi_data_push(table_data, sizeof *numamem);
+
+        if (!ent) {
+            build_srat_memory(numamem, base, end - base, default_node,
+                              MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
+            break;
+        }
+
+        dev = NVDIMM(ent->data);
+        addr = object_property_get_uint(OBJECT(dev), PC_DIMM_ADDR_PROP, NULL);
+        size = object_property_get_uint(OBJECT(dev), PC_DIMM_SIZE_PROP, NULL);
+        node = object_property_get_uint(OBJECT(dev), PC_DIMM_NODE_PROP, NULL);
+
+        if (base < addr) {
+            build_srat_memory(numamem, base, addr - base, default_node,
+                              MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
+            numamem = acpi_data_push(table_data, sizeof *numamem);
+        }
+        build_srat_memory(numamem, addr, size, node,
+                          MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED |
+                          MEM_AFFINITY_NON_VOLATILE);
+
+        base = addr + size;
+        ent = ent->next;
+    }
+
+    g_slist_free(nvdimms);
+}
+
 static void
 build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
 {
@@ -2434,10 +2474,9 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
      * providing _PXM method if necessary.
      */
     if (hotplugabble_address_space_size) {
-        numamem = acpi_data_push(table_data, sizeof *numamem);
-        build_srat_memory(numamem, pcms->hotplug_memory.base,
-                          hotplugabble_address_space_size, pcms->numa_nodes - 1,
-                          MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
+        build_srat_hotpluggable_memory(table_data, pcms->hotplug_memory.base,
+                                       hotplugabble_address_space_size,
+                                       pcms->numa_nodes - 1);
     }
 
     build_header(linker, table_data,
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 7fd87c4e1c..ca9d6aa714 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -144,4 +144,15 @@ void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,
                        uint32_t ram_slots);
 void nvdimm_plug(AcpiNVDIMMState *state);
 void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev);
+
+/*
+ * Inquire NVDIMM devices and link them into the list which is
+ * returned to the caller and sorted in the ascending order of the
+ * base address of NVDIMM devices.
+ *
+ * Note: it is the caller's responsibility to free the list to avoid
+ * memory leak.
+ */
+GSList *nvdimm_get_device_list(void);
+
 #endif
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-02-26 14:59 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-02-17  6:31 [Qemu-devel] [PATCH] hw/acpi-build: build SRAT memory affinity structures for NVDIMM Haozhong Zhang
2018-02-20 14:10 ` Igor Mammedov
2018-02-21  1:17   ` Dan Williams
2018-02-21 13:55     ` Igor Mammedov
2018-02-21 14:51       ` Dan Williams
2018-02-26 14:08         ` Igor Mammedov
2018-02-22  1:40       ` Haozhong Zhang
2018-02-26 13:59         ` Igor Mammedov
2018-02-26 14:58           ` Haozhong Zhang
2018-02-24  1:17 ` no-reply
2018-02-24  1:42   ` Haozhong Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).