From: Nicholas Piggin <npiggin@gmail.com>
To: qemu-ppc@nongnu.org
Cc: "Nicholas Piggin" <npiggin@gmail.com>,
qemu-devel@nongnu.org, "Eduardo Habkost" <eduardo@habkost.net>,
"Marcel Apfelbaum" <marcel.apfelbaum@gmail.com>,
"Philippe Mathieu-Daudé" <philmd@linaro.org>,
"Yanan Wang" <wangyanan55@huawei.com>,
"Zhao Liu" <zhao1.liu@intel.com>,
"Frédéric Barrat" <fbarrat@linux.ibm.com>,
"Igor Mammedov" <imammedo@redhat.com>
Subject: [PATCH 3/3] ppc/pnv: Enable sparse chip RAM memory addresses
Date: Mon, 3 Mar 2025 20:07:32 +1000 [thread overview]
Message-ID: <20250303100732.576457-4-npiggin@gmail.com> (raw)
In-Reply-To: <20250303100732.576457-1-npiggin@gmail.com>
Power CPUs place RAM memory regions for each chip (NUMA node) at
fixed locations in the real address space, resulting in a sparse
(disjoint) RAM address layout.
Use the new NUMA machine class attribute numa_skip_ram_container to
allow pnv machine init to lay out NUMA node memory regions into the
system address space in the proper location rather than packing them
densely from address 0.
With the following options:
-smp 2,sockets=2 -m 4g
-object memory-backend-ram,size=2G,id=mem0
-object memory-backend-ram,size=2G,id=mem1
-numa node,nodeid=0,memdev=mem0,cpus=0
-numa node,nodeid=1,memdev=mem1,cpus=1
Linux (PowerNV) now boots with:
node 0: [mem 0x0000000000000000-0x000000007fffffff]
node 1: [mem 0x0000100000000000-0x000010007fffffff]
Prior to this change:
node 0: [mem 0x0000000000000000-0x000000007fffffff]
node 1: [mem 0x0000000000000000-0x00000000ffffffff]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
hw/ppc/pnv.c | 37 ++++++++++++++++++++++++++++++++++---
1 file changed, 34 insertions(+), 3 deletions(-)
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 5f2041f7f9d..b6308593335 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -966,7 +966,24 @@ static void pnv_init(MachineState *machine)
exit(EXIT_FAILURE);
}
- memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+ if (machine->ram) {
+ memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+ } else if (machine->numa_state) {
+ for (i = 0; i < machine->numa_state->num_nodes; i++) {
+ MemoryRegion *mr = machine->numa_state->nodes[i].node_mr;
+
+ /*
+ * powernv uses numa_mem_align_shift to derive the base RAM address
+ * for each chip addr = Chip Number << shift.
+ */
+ chip_ram_start = (uint64_t)i << mc->numa_mem_align_shift;
+ if (!mr) {
+ continue;
+ }
+ memory_region_add_subregion(get_system_memory(), chip_ram_start,
+ mr);
+ }
+ }
/*
* Create our simple PNOR device
@@ -1100,20 +1117,30 @@ static void pnv_init(MachineState *machine)
exit(1);
}
+ chip_ram_start = 0;
pnv->chips = g_new0(PnvChip *, pnv->num_chips);
for (i = 0; i < pnv->num_chips; i++) {
char chip_name[32];
Object *chip = OBJECT(qdev_new(chip_typename));
- uint64_t chip_ram_size = pnv_chip_get_ram_size(pnv, i);
+ uint64_t chip_ram_size;
pnv->chips[i] = PNV_CHIP(chip);
+ if (machine->numa_state) {
+ chip_ram_start = (uint64_t)i << mc->numa_mem_align_shift;
+ chip_ram_size = machine->numa_state->nodes[i].node_mem;
+ } else {
+ chip_ram_size = pnv_chip_get_ram_size(pnv, i);
+ }
+
/* Distribute RAM among the chips */
object_property_set_int(chip, "ram-start", chip_ram_start,
&error_fatal);
object_property_set_int(chip, "ram-size", chip_ram_size,
&error_fatal);
- chip_ram_start += chip_ram_size;
+ if (!machine->numa_state) {
+ chip_ram_start += chip_ram_size;
+ }
snprintf(chip_name, sizeof(chip_name), "chip[%d]", i);
object_property_add_child(OBJECT(pnv), chip_name, chip);
@@ -2680,6 +2707,7 @@ static void pnv_machine_power8_class_init(ObjectClass *oc, void *data)
mc->desc = "IBM PowerNV (Non-Virtualized) POWER8";
mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+ mc->numa_mem_align_shift = 42;
compat_props_add(mc->compat_props, phb_compat, G_N_ELEMENTS(phb_compat));
xic->icp_get = pnv_icp_get;
@@ -2709,6 +2737,7 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data)
mc->desc = "IBM PowerNV (Non-Virtualized) POWER9";
mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.2");
+ mc->numa_mem_align_shift = 42;
compat_props_add(mc->compat_props, phb_compat, G_N_ELEMENTS(phb_compat));
xfc->match_nvt = pnv_match_nvt;
@@ -2747,6 +2776,7 @@ static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data)
};
mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0");
+ mc->numa_mem_align_shift = 44;
compat_props_add(mc->compat_props, phb_compat, G_N_ELEMENTS(phb_compat));
mc->alias = "powernv";
@@ -2951,6 +2981,7 @@ static void pnv_machine_class_init(ObjectClass *oc, void *data)
mc->numa_mem_supported = true;
mc->auto_enable_numa = true;
+ mc->numa_skip_ram_container = true;
mc->cpu_index_to_instance_props = pnv_cpu_index_to_props;
mc->get_default_cpu_node_id = pnv_get_default_cpu_node_id;
--
2.47.1
prev parent reply other threads:[~2025-03-03 10:09 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-03 10:07 [PATCH 0/3] ppc/pnv: Support sparse NUMA memory addresses Nicholas Piggin
2025-03-03 10:07 ` [PATCH 1/3] ppc/pnv: Add support for NUMA configuration Nicholas Piggin
2025-03-03 10:07 ` [PATCH 2/3] hw/core/numa: add attribute to skip creation of MachineState.ram region Nicholas Piggin
2025-03-03 10:07 ` Nicholas Piggin [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250303100732.576457-4-npiggin@gmail.com \
--to=npiggin@gmail.com \
--cc=eduardo@habkost.net \
--cc=fbarrat@linux.ibm.com \
--cc=imammedo@redhat.com \
--cc=marcel.apfelbaum@gmail.com \
--cc=philmd@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=qemu-ppc@nongnu.org \
--cc=wangyanan55@huawei.com \
--cc=zhao1.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).