[PATCH 2/3] hw/core/numa: add attribute to skip creation of MachineState.ram region

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Nicholas Piggin <npiggin@gmail.com>
To: qemu-ppc@nongnu.org
Cc: "Nicholas Piggin" <npiggin@gmail.com>,
	qemu-devel@nongnu.org, "Eduardo Habkost" <eduardo@habkost.net>,
	"Marcel Apfelbaum" <marcel.apfelbaum@gmail.com>,
	"Philippe Mathieu-Daudé" <philmd@linaro.org>,
	"Yanan Wang" <wangyanan55@huawei.com>,
	"Zhao Liu" <zhao1.liu@intel.com>,
	"Frédéric Barrat" <fbarrat@linux.ibm.com>,
	"Igor Mammedov" <imammedo@redhat.com>
Subject: [PATCH 2/3] hw/core/numa: add attribute to skip creation of MachineState.ram region
Date: Mon,  3 Mar 2025 20:07:31 +1000	[thread overview]
Message-ID: <20250303100732.576457-3-npiggin@gmail.com> (raw)
In-Reply-To: <20250303100732.576457-1-npiggin@gmail.com>

NUMA machines with sparse address topologies do not want all NUMA
regions packed densely inside the MachineState.ram container region.
Add a machine class attribute that skips creating this container
region. Individual NUMA memory device regions are recorded in NodeInfo
where the machine init can add them to the system address space itself.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 include/hw/boards.h   |  6 ++++++
 include/system/numa.h |  1 +
 hw/core/numa.c        | 44 +++++++++++++++++++++++++++++++++++--------
 3 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 9360d1ce394..9e6654ee9ca 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -233,6 +233,11 @@ typedef struct {
  *    is not needed.
  * @numa_mem_supported:
  *    true if '--numa node.mem' option is supported and false otherwise
+ * @numa_skip_ram_container:
+ *    If false, numa memory init creates the MachineState.ram memory region
+ *    with all numa node regions packed densely within it. If true, the .ram
+ *    region is not created. Machines can use this e.g., to place NUMA
+ *    regions sparsely within the address space.
  * @hotplug_allowed:
  *    If the hook is provided, then it'll be called for each device
  *    hotplug to check whether the device hotplug is allowed.  Return
@@ -311,6 +316,7 @@ struct MachineClass {
     bool nvdimm_supported;
     bool numa_mem_supported;
     bool auto_enable_numa;
+    bool numa_skip_ram_container;
     bool cpu_cluster_has_numa_boundary;
     SMPCompatProps smp_props;
     const char *default_ram_id;
diff --git a/include/system/numa.h b/include/system/numa.h
index 1044b0eb6e9..001e872d33e 100644
--- a/include/system/numa.h
+++ b/include/system/numa.h
@@ -38,6 +38,7 @@ enum {
 typedef struct NodeInfo {
     uint64_t node_mem;
     struct HostMemoryBackend *node_memdev;
+    MemoryRegion *node_mr;
     bool present;
     bool has_cpu;
     bool has_gi;
diff --git a/hw/core/numa.c b/hw/core/numa.c
index 218576f7455..d84b2d70849 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -623,19 +623,46 @@ static void complete_init_numa_distance(MachineState *ms)
     }
 }
 
-static void numa_init_memdev_container(MachineState *ms, MemoryRegion *ram)
+/*
+ * Consume all NUMA memory backends and store the regions in NodeInfo.node_mr.
+ */
+static void numa_init_memdev(MachineState *ms)
 {
     int i;
-    uint64_t addr = 0;
 
     for (i = 0; i < ms->numa_state->num_nodes; i++) {
-        uint64_t size = ms->numa_state->nodes[i].node_mem;
         HostMemoryBackend *backend = ms->numa_state->nodes[i].node_memdev;
         if (!backend) {
             continue;
         }
         MemoryRegion *seg = machine_consume_memdev(ms, backend);
-        memory_region_add_subregion(ram, addr, seg);
+        ms->numa_state->nodes[i].node_mr = seg;
+    }
+}
+
+/*
+ * Consume all NUMA memory backends as with numa_init_memdev, packing them
+ * densely into a MachineState.ram "container" region.
+ */
+static void numa_init_memdev_container(MachineState *ms)
+{
+    int i;
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    uint64_t addr = 0;
+
+    ms->ram = g_new(MemoryRegion, 1);
+    memory_region_init(ms->ram, OBJECT(ms), mc->default_ram_id,
+                       ms->ram_size);
+
+    numa_init_memdev(ms);
+
+    for (i = 0; i < ms->numa_state->num_nodes; i++) {
+        uint64_t size = ms->numa_state->nodes[i].node_mem;
+        MemoryRegion *seg = ms->numa_state->nodes[i].node_mr;
+        if (!seg) {
+            continue;
+        }
+        memory_region_add_subregion(ms->ram, addr, seg);
         addr += size;
     }
 }
@@ -706,10 +733,11 @@ void numa_complete_configuration(MachineState *ms)
                              " properties are mutually exclusive");
                 exit(1);
             }
-            ms->ram = g_new(MemoryRegion, 1);
-            memory_region_init(ms->ram, OBJECT(ms), mc->default_ram_id,
-                               ms->ram_size);
-            numa_init_memdev_container(ms, ms->ram);
+            if (mc->numa_skip_ram_container) {
+                numa_init_memdev(ms);
+            } else {
+                numa_init_memdev_container(ms);
+            }
         }
         /* QEMU needs at least all unique node pair distances to build
          * the whole NUMA distance table. QEMU treats the distance table
-- 
2.47.1

next prev parent reply	other threads:[~2025-03-03 10:09 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-03 10:07 [PATCH 0/3] ppc/pnv: Support sparse NUMA memory addresses Nicholas Piggin
2025-03-03 10:07 ` [PATCH 1/3] ppc/pnv: Add support for NUMA configuration Nicholas Piggin
2025-03-03 10:07 ` Nicholas Piggin [this message]
2025-03-03 10:07 ` [PATCH 3/3] ppc/pnv: Enable sparse chip RAM memory addresses Nicholas Piggin

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:9360d1ce39 dfblob:9e6654ee9c dfblob:1044b0eb6e
dfblob:001e872d33 dfblob:218576f745 dfblob:d84b2d7084 )
 OR (
bs:"[PATCH 2/3] hw/core/numa: add attribute to skip creation of MachineState.ram region" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250303100732.576457-3-npiggin@gmail.com \
    --to=npiggin@gmail.com \
    --cc=eduardo@habkost.net \
    --cc=fbarrat@linux.ibm.com \
    --cc=imammedo@redhat.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=wangyanan55@huawei.com \
    --cc=zhao1.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).