* [Qemu-devel] [PATCH 2.12] spapr: replace numa_get_node() with lookup in pc-dimm list
@ 2017-12-05 15:41 Igor Mammedov
2017-12-06 0:14 ` David Gibson
0 siblings, 1 reply; 4+ messages in thread
From: Igor Mammedov @ 2017-12-05 15:41 UTC (permalink / raw)
To: qemu-devel; +Cc: david, agraf, ehabkost, qemu-ppc
SPAPR is the last user of numa_get_node() and a bunch of
supporting code to maintain numa_info[x].addr list.
Get LMB node id from pc-dimm list, which allows to
remove ~80LOC maintaining dynamic address range
lookup list.
It also removes pc-dimm dependency on numa_[un]set_mem_node_id()
and makes pc-dimms a sole source of information about which
node it belongs to and removes duplicate data from global
numa_info.
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
Beside making code simpler, my interest in simplification
lies in allowing calling parse_numa_opts() multiple times,
without complex cleanups in case NUMA config is changed
since startup.
PS:
build tested only
---
include/sysemu/numa.h | 10 ------
hw/mem/pc-dimm.c | 2 --
hw/ppc/spapr.c | 29 +++++++++++++++-
numa.c | 94 ---------------------------------------------------
4 files changed, 28 insertions(+), 107 deletions(-)
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 5c6df28..b354521 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -10,17 +10,10 @@
extern int nb_numa_nodes; /* Number of NUMA nodes */
extern bool have_numa_distance;
-struct numa_addr_range {
- ram_addr_t mem_start;
- ram_addr_t mem_end;
- QLIST_ENTRY(numa_addr_range) entry;
-};
-
struct node_info {
uint64_t node_mem;
struct HostMemoryBackend *node_memdev;
bool present;
- QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
uint8_t distance[MAX_NODES];
};
@@ -33,9 +26,6 @@ extern NodeInfo numa_info[MAX_NODES];
void parse_numa_opts(MachineState *ms);
void query_numa_node_mem(NumaNodeMem node_mem[]);
extern QemuOptsList qemu_numa_opts;
-void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
-void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
-uint32_t numa_get_node(ram_addr_t addr, Error **errp);
void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size);
void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 66eace5..6e74b61 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -109,7 +109,6 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
vmstate_register_ram(vmstate_mr, dev);
- numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
out:
error_propagate(errp, local_err);
@@ -122,7 +121,6 @@ void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm);
- numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
memory_region_del_subregion(&hpms->mr, mr);
vmstate_unregister_ram(vmstate_mr, dev);
}
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 9efddea..8de0b5b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -641,6 +641,26 @@ static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
}
+static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr)
+{
+ MemoryDeviceInfoList *info;
+
+ for (info = list; info; info = info->next) {
+ MemoryDeviceInfo *value = info->value;
+
+ if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) {
+ PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data;
+
+ if (pcdimm_info->addr >= addr &&
+ addr < (pcdimm_info->addr + pcdimm_info->size)) {
+ return pcdimm_info->node;
+ }
+ }
+ }
+
+ return -1;
+}
+
/*
* Adds ibm,dynamic-reconfiguration-memory node.
* Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
@@ -658,6 +678,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
lmb_size;
uint32_t *int_buf, *cur_index, buf_len;
int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
+ MemoryDeviceInfoList *dimms = NULL;
/*
* Don't create the node if there is no hotpluggable memory
@@ -692,6 +713,11 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
goto out;
}
+ if (hotplug_lmb_start) {
+ MemoryDeviceInfoList **prev = &dimms;
+ qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
+ }
+
/* ibm,dynamic-memory */
int_buf[0] = cpu_to_be32(nr_lmbs);
cur_index++;
@@ -709,7 +735,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc));
dynamic_memory[3] = cpu_to_be32(0); /* reserved */
- dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
+ dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr));
if (memory_region_present(get_system_memory(), addr)) {
dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
} else {
@@ -732,6 +758,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
}
+ qapi_free_MemoryDeviceInfoList(dimms);
ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
if (ret < 0) {
goto out;
diff --git a/numa.c b/numa.c
index 7151b24..98fa9a4 100644
--- a/numa.c
+++ b/numa.c
@@ -55,92 +55,6 @@ int nb_numa_nodes;
bool have_numa_distance;
NodeInfo numa_info[MAX_NODES];
-void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
-{
- struct numa_addr_range *range;
-
- /*
- * Memory-less nodes can come here with 0 size in which case,
- * there is nothing to do.
- */
- if (!size) {
- return;
- }
-
- range = g_malloc0(sizeof(*range));
- range->mem_start = addr;
- range->mem_end = addr + size - 1;
- QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry);
-}
-
-void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
-{
- struct numa_addr_range *range, *next;
-
- QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) {
- if (addr == range->mem_start && (addr + size - 1) == range->mem_end) {
- QLIST_REMOVE(range, entry);
- g_free(range);
- return;
- }
- }
-}
-
-static void numa_set_mem_ranges(void)
-{
- int i;
- ram_addr_t mem_start = 0;
-
- /*
- * Deduce start address of each node and use it to store
- * the address range info in numa_info address range list
- */
- for (i = 0; i < nb_numa_nodes; i++) {
- numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i);
- mem_start += numa_info[i].node_mem;
- }
-}
-
-/*
- * Check if @addr falls under NUMA @node.
- */
-static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node)
-{
- struct numa_addr_range *range;
-
- QLIST_FOREACH(range, &numa_info[node].addr, entry) {
- if (addr >= range->mem_start && addr <= range->mem_end) {
- return true;
- }
- }
- return false;
-}
-
-/*
- * Given an address, return the index of the NUMA node to which the
- * address belongs to.
- */
-uint32_t numa_get_node(ram_addr_t addr, Error **errp)
-{
- uint32_t i;
-
- /* For non NUMA configurations, check if the addr falls under node 0 */
- if (!nb_numa_nodes) {
- if (numa_addr_belongs_to_node(addr, 0)) {
- return 0;
- }
- }
-
- for (i = 0; i < nb_numa_nodes; i++) {
- if (numa_addr_belongs_to_node(addr, i)) {
- return i;
- }
- }
-
- error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any "
- "NUMA node", addr);
- return -1;
-}
static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
Error **errp)
@@ -497,12 +411,6 @@ void parse_numa_opts(MachineState *ms)
exit(1);
}
- for (i = 0; i < nb_numa_nodes; i++) {
- QLIST_INIT(&numa_info[i].addr);
- }
-
- numa_set_mem_ranges();
-
/* QEMU needs at least all unique node pair distances to build
* the whole NUMA distance table. QEMU treats the distance table
* as symmetric by default, i.e. distance A->B == distance B->A.
@@ -522,8 +430,6 @@ void parse_numa_opts(MachineState *ms)
/* Validation succeeded, now fill in any missing distances. */
complete_init_numa_distance();
}
- } else {
- numa_set_mem_node_id(0, ram_size, 0);
}
}
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [Qemu-devel] [PATCH 2.12] spapr: replace numa_get_node() with lookup in pc-dimm list
2017-12-05 15:41 [Qemu-devel] [PATCH 2.12] spapr: replace numa_get_node() with lookup in pc-dimm list Igor Mammedov
@ 2017-12-06 0:14 ` David Gibson
2017-12-06 9:57 ` Igor Mammedov
0 siblings, 1 reply; 4+ messages in thread
From: David Gibson @ 2017-12-06 0:14 UTC (permalink / raw)
To: Igor Mammedov; +Cc: qemu-devel, agraf, ehabkost, qemu-ppc
[-- Attachment #1: Type: text/plain, Size: 10168 bytes --]
On Tue, Dec 05, 2017 at 04:41:17PM +0100, Igor Mammedov wrote:
> SPAPR is the last user of numa_get_node() and a bunch of
> supporting code to maintain numa_info[x].addr list.
>
> Get LMB node id from pc-dimm list, which allows to
> remove ~80LOC maintaining dynamic address range
> lookup list.
>
> It also removes pc-dimm dependency on numa_[un]set_mem_node_id()
> and makes pc-dimms a sole source of information about which
> node it belongs to and removes duplicate data from global
> numa_info.
>
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
> Beside making code simpler, my interest in simplification
> lies in allowing calling parse_numa_opts() multiple times,
> without complex cleanups in case NUMA config is changed
> since startup.
>
> PS:
> build tested only
> ---
> include/sysemu/numa.h | 10 ------
> hw/mem/pc-dimm.c | 2 --
> hw/ppc/spapr.c | 29 +++++++++++++++-
> numa.c | 94 ---------------------------------------------------
> 4 files changed, 28 insertions(+), 107 deletions(-)
Applied to ppc-for-2.12.
It definitely seems like an improvement over what we have. Looking
back at the DIMM list from QMP in the loop seems a little roundabout
though. Maybe we'd be better stepping through the DIMMs, then
stepping through the LMBs within each DIMM, rather than just stepping
through the LMBs directly.
>
> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> index 5c6df28..b354521 100644
> --- a/include/sysemu/numa.h
> +++ b/include/sysemu/numa.h
> @@ -10,17 +10,10 @@
> extern int nb_numa_nodes; /* Number of NUMA nodes */
> extern bool have_numa_distance;
>
> -struct numa_addr_range {
> - ram_addr_t mem_start;
> - ram_addr_t mem_end;
> - QLIST_ENTRY(numa_addr_range) entry;
> -};
> -
> struct node_info {
> uint64_t node_mem;
> struct HostMemoryBackend *node_memdev;
> bool present;
> - QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
> uint8_t distance[MAX_NODES];
> };
>
> @@ -33,9 +26,6 @@ extern NodeInfo numa_info[MAX_NODES];
> void parse_numa_opts(MachineState *ms);
> void query_numa_node_mem(NumaNodeMem node_mem[]);
> extern QemuOptsList qemu_numa_opts;
> -void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
> -void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
> -uint32_t numa_get_node(ram_addr_t addr, Error **errp);
> void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
> int nb_nodes, ram_addr_t size);
> void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
> diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> index 66eace5..6e74b61 100644
> --- a/hw/mem/pc-dimm.c
> +++ b/hw/mem/pc-dimm.c
> @@ -109,7 +109,6 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
>
> memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
> vmstate_register_ram(vmstate_mr, dev);
> - numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
>
> out:
> error_propagate(errp, local_err);
> @@ -122,7 +121,6 @@ void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
> PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm);
>
> - numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
> memory_region_del_subregion(&hpms->mr, mr);
> vmstate_unregister_ram(vmstate_mr, dev);
> }
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 9efddea..8de0b5b 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -641,6 +641,26 @@ static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
>
> }
>
> +static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr)
> +{
> + MemoryDeviceInfoList *info;
> +
> + for (info = list; info; info = info->next) {
> + MemoryDeviceInfo *value = info->value;
> +
> + if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) {
> + PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data;
> +
> + if (pcdimm_info->addr >= addr &&
> + addr < (pcdimm_info->addr + pcdimm_info->size)) {
> + return pcdimm_info->node;
> + }
> + }
> + }
> +
> + return -1;
> +}
> +
> /*
> * Adds ibm,dynamic-reconfiguration-memory node.
> * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
> @@ -658,6 +678,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> lmb_size;
> uint32_t *int_buf, *cur_index, buf_len;
> int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
> + MemoryDeviceInfoList *dimms = NULL;
>
> /*
> * Don't create the node if there is no hotpluggable memory
> @@ -692,6 +713,11 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> goto out;
> }
>
> + if (hotplug_lmb_start) {
> + MemoryDeviceInfoList **prev = &dimms;
> + qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
> + }
> +
> /* ibm,dynamic-memory */
> int_buf[0] = cpu_to_be32(nr_lmbs);
> cur_index++;
> @@ -709,7 +735,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
> dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc));
> dynamic_memory[3] = cpu_to_be32(0); /* reserved */
> - dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
> + dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr));
> if (memory_region_present(get_system_memory(), addr)) {
> dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
> } else {
> @@ -732,6 +758,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
>
> cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
> }
> + qapi_free_MemoryDeviceInfoList(dimms);
> ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
> if (ret < 0) {
> goto out;
> diff --git a/numa.c b/numa.c
> index 7151b24..98fa9a4 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -55,92 +55,6 @@ int nb_numa_nodes;
> bool have_numa_distance;
> NodeInfo numa_info[MAX_NODES];
>
> -void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
> -{
> - struct numa_addr_range *range;
> -
> - /*
> - * Memory-less nodes can come here with 0 size in which case,
> - * there is nothing to do.
> - */
> - if (!size) {
> - return;
> - }
> -
> - range = g_malloc0(sizeof(*range));
> - range->mem_start = addr;
> - range->mem_end = addr + size - 1;
> - QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry);
> -}
> -
> -void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
> -{
> - struct numa_addr_range *range, *next;
> -
> - QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) {
> - if (addr == range->mem_start && (addr + size - 1) == range->mem_end) {
> - QLIST_REMOVE(range, entry);
> - g_free(range);
> - return;
> - }
> - }
> -}
> -
> -static void numa_set_mem_ranges(void)
> -{
> - int i;
> - ram_addr_t mem_start = 0;
> -
> - /*
> - * Deduce start address of each node and use it to store
> - * the address range info in numa_info address range list
> - */
> - for (i = 0; i < nb_numa_nodes; i++) {
> - numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i);
> - mem_start += numa_info[i].node_mem;
> - }
> -}
> -
> -/*
> - * Check if @addr falls under NUMA @node.
> - */
> -static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node)
> -{
> - struct numa_addr_range *range;
> -
> - QLIST_FOREACH(range, &numa_info[node].addr, entry) {
> - if (addr >= range->mem_start && addr <= range->mem_end) {
> - return true;
> - }
> - }
> - return false;
> -}
> -
> -/*
> - * Given an address, return the index of the NUMA node to which the
> - * address belongs to.
> - */
> -uint32_t numa_get_node(ram_addr_t addr, Error **errp)
> -{
> - uint32_t i;
> -
> - /* For non NUMA configurations, check if the addr falls under node 0 */
> - if (!nb_numa_nodes) {
> - if (numa_addr_belongs_to_node(addr, 0)) {
> - return 0;
> - }
> - }
> -
> - for (i = 0; i < nb_numa_nodes; i++) {
> - if (numa_addr_belongs_to_node(addr, i)) {
> - return i;
> - }
> - }
> -
> - error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any "
> - "NUMA node", addr);
> - return -1;
> -}
>
> static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
> Error **errp)
> @@ -497,12 +411,6 @@ void parse_numa_opts(MachineState *ms)
> exit(1);
> }
>
> - for (i = 0; i < nb_numa_nodes; i++) {
> - QLIST_INIT(&numa_info[i].addr);
> - }
> -
> - numa_set_mem_ranges();
> -
> /* QEMU needs at least all unique node pair distances to build
> * the whole NUMA distance table. QEMU treats the distance table
> * as symmetric by default, i.e. distance A->B == distance B->A.
> @@ -522,8 +430,6 @@ void parse_numa_opts(MachineState *ms)
> /* Validation succeeded, now fill in any missing distances. */
> complete_init_numa_distance();
> }
> - } else {
> - numa_set_mem_node_id(0, ram_size, 0);
> }
> }
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [Qemu-devel] [PATCH 2.12] spapr: replace numa_get_node() with lookup in pc-dimm list
2017-12-06 0:14 ` David Gibson
@ 2017-12-06 9:57 ` Igor Mammedov
2017-12-06 10:03 ` David Gibson
0 siblings, 1 reply; 4+ messages in thread
From: Igor Mammedov @ 2017-12-06 9:57 UTC (permalink / raw)
To: David Gibson; +Cc: qemu-devel, agraf, ehabkost, qemu-ppc
On Wed, 6 Dec 2017 11:14:06 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:
> On Tue, Dec 05, 2017 at 04:41:17PM +0100, Igor Mammedov wrote:
> > SPAPR is the last user of numa_get_node() and a bunch of
> > supporting code to maintain numa_info[x].addr list.
> >
> > Get LMB node id from pc-dimm list, which allows to
> > remove ~80LOC maintaining dynamic address range
> > lookup list.
> >
> > It also removes pc-dimm dependency on numa_[un]set_mem_node_id()
> > and makes pc-dimms a sole source of information about which
> > node it belongs to and removes duplicate data from global
> > numa_info.
> >
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> > Beside making code simpler, my interest in simplification
> > lies in allowing calling parse_numa_opts() multiple times,
> > without complex cleanups in case NUMA config is changed
> > since startup.
> >
> > PS:
> > build tested only
> > ---
> > include/sysemu/numa.h | 10 ------
> > hw/mem/pc-dimm.c | 2 --
> > hw/ppc/spapr.c | 29 +++++++++++++++-
> > numa.c | 94 ---------------------------------------------------
> > 4 files changed, 28 insertions(+), 107 deletions(-)
>
> Applied to ppc-for-2.12.
Thanks
>
> It definitely seems like an improvement over what we have. Looking
> back at the DIMM list from QMP in the loop seems a little roundabout
> though. Maybe we'd be better stepping through the DIMMs, then
> stepping through the LMBs within each DIMM, rather than just stepping
> through the LMBs directly.
Surely that would be better, maybe someone from ppc side would take care
of it.
>
>
> >
> > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> > index 5c6df28..b354521 100644
> > --- a/include/sysemu/numa.h
> > +++ b/include/sysemu/numa.h
> > @@ -10,17 +10,10 @@
> > extern int nb_numa_nodes; /* Number of NUMA nodes */
> > extern bool have_numa_distance;
> >
> > -struct numa_addr_range {
> > - ram_addr_t mem_start;
> > - ram_addr_t mem_end;
> > - QLIST_ENTRY(numa_addr_range) entry;
> > -};
> > -
> > struct node_info {
> > uint64_t node_mem;
> > struct HostMemoryBackend *node_memdev;
> > bool present;
> > - QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
> > uint8_t distance[MAX_NODES];
> > };
> >
> > @@ -33,9 +26,6 @@ extern NodeInfo numa_info[MAX_NODES];
> > void parse_numa_opts(MachineState *ms);
> > void query_numa_node_mem(NumaNodeMem node_mem[]);
> > extern QemuOptsList qemu_numa_opts;
> > -void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
> > -void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
> > -uint32_t numa_get_node(ram_addr_t addr, Error **errp);
> > void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
> > int nb_nodes, ram_addr_t size);
> > void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
> > diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> > index 66eace5..6e74b61 100644
> > --- a/hw/mem/pc-dimm.c
> > +++ b/hw/mem/pc-dimm.c
> > @@ -109,7 +109,6 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
> >
> > memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
> > vmstate_register_ram(vmstate_mr, dev);
> > - numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
> >
> > out:
> > error_propagate(errp, local_err);
> > @@ -122,7 +121,6 @@ void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
> > PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> > MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm);
> >
> > - numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
> > memory_region_del_subregion(&hpms->mr, mr);
> > vmstate_unregister_ram(vmstate_mr, dev);
> > }
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 9efddea..8de0b5b 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -641,6 +641,26 @@ static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
> >
> > }
> >
> > +static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr)
> > +{
> > + MemoryDeviceInfoList *info;
> > +
> > + for (info = list; info; info = info->next) {
> > + MemoryDeviceInfo *value = info->value;
> > +
> > + if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) {
> > + PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data;
> > +
> > + if (pcdimm_info->addr >= addr &&
> > + addr < (pcdimm_info->addr + pcdimm_info->size)) {
> > + return pcdimm_info->node;
> > + }
> > + }
> > + }
> > +
> > + return -1;
> > +}
> > +
> > /*
> > * Adds ibm,dynamic-reconfiguration-memory node.
> > * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
> > @@ -658,6 +678,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> > lmb_size;
> > uint32_t *int_buf, *cur_index, buf_len;
> > int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
> > + MemoryDeviceInfoList *dimms = NULL;
> >
> > /*
> > * Don't create the node if there is no hotpluggable memory
> > @@ -692,6 +713,11 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> > goto out;
> > }
> >
> > + if (hotplug_lmb_start) {
> > + MemoryDeviceInfoList **prev = &dimms;
> > + qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
> > + }
> > +
> > /* ibm,dynamic-memory */
> > int_buf[0] = cpu_to_be32(nr_lmbs);
> > cur_index++;
> > @@ -709,7 +735,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> > dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
> > dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc));
> > dynamic_memory[3] = cpu_to_be32(0); /* reserved */
> > - dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
> > + dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr));
> > if (memory_region_present(get_system_memory(), addr)) {
> > dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
> > } else {
> > @@ -732,6 +758,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
> >
> > cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
> > }
> > + qapi_free_MemoryDeviceInfoList(dimms);
> > ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
> > if (ret < 0) {
> > goto out;
> > diff --git a/numa.c b/numa.c
> > index 7151b24..98fa9a4 100644
> > --- a/numa.c
> > +++ b/numa.c
> > @@ -55,92 +55,6 @@ int nb_numa_nodes;
> > bool have_numa_distance;
> > NodeInfo numa_info[MAX_NODES];
> >
> > -void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
> > -{
> > - struct numa_addr_range *range;
> > -
> > - /*
> > - * Memory-less nodes can come here with 0 size in which case,
> > - * there is nothing to do.
> > - */
> > - if (!size) {
> > - return;
> > - }
> > -
> > - range = g_malloc0(sizeof(*range));
> > - range->mem_start = addr;
> > - range->mem_end = addr + size - 1;
> > - QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry);
> > -}
> > -
> > -void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node)
> > -{
> > - struct numa_addr_range *range, *next;
> > -
> > - QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) {
> > - if (addr == range->mem_start && (addr + size - 1) == range->mem_end) {
> > - QLIST_REMOVE(range, entry);
> > - g_free(range);
> > - return;
> > - }
> > - }
> > -}
> > -
> > -static void numa_set_mem_ranges(void)
> > -{
> > - int i;
> > - ram_addr_t mem_start = 0;
> > -
> > - /*
> > - * Deduce start address of each node and use it to store
> > - * the address range info in numa_info address range list
> > - */
> > - for (i = 0; i < nb_numa_nodes; i++) {
> > - numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i);
> > - mem_start += numa_info[i].node_mem;
> > - }
> > -}
> > -
> > -/*
> > - * Check if @addr falls under NUMA @node.
> > - */
> > -static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node)
> > -{
> > - struct numa_addr_range *range;
> > -
> > - QLIST_FOREACH(range, &numa_info[node].addr, entry) {
> > - if (addr >= range->mem_start && addr <= range->mem_end) {
> > - return true;
> > - }
> > - }
> > - return false;
> > -}
> > -
> > -/*
> > - * Given an address, return the index of the NUMA node to which the
> > - * address belongs to.
> > - */
> > -uint32_t numa_get_node(ram_addr_t addr, Error **errp)
> > -{
> > - uint32_t i;
> > -
> > - /* For non NUMA configurations, check if the addr falls under node 0 */
> > - if (!nb_numa_nodes) {
> > - if (numa_addr_belongs_to_node(addr, 0)) {
> > - return 0;
> > - }
> > - }
> > -
> > - for (i = 0; i < nb_numa_nodes; i++) {
> > - if (numa_addr_belongs_to_node(addr, i)) {
> > - return i;
> > - }
> > - }
> > -
> > - error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any "
> > - "NUMA node", addr);
> > - return -1;
> > -}
> >
> > static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
> > Error **errp)
> > @@ -497,12 +411,6 @@ void parse_numa_opts(MachineState *ms)
> > exit(1);
> > }
> >
> > - for (i = 0; i < nb_numa_nodes; i++) {
> > - QLIST_INIT(&numa_info[i].addr);
> > - }
> > -
> > - numa_set_mem_ranges();
> > -
> > /* QEMU needs at least all unique node pair distances to build
> > * the whole NUMA distance table. QEMU treats the distance table
> > * as symmetric by default, i.e. distance A->B == distance B->A.
> > @@ -522,8 +430,6 @@ void parse_numa_opts(MachineState *ms)
> > /* Validation succeeded, now fill in any missing distances. */
> > complete_init_numa_distance();
> > }
> > - } else {
> > - numa_set_mem_node_id(0, ram_size, 0);
> > }
> > }
> >
>
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [Qemu-devel] [PATCH 2.12] spapr: replace numa_get_node() with lookup in pc-dimm list
2017-12-06 9:57 ` Igor Mammedov
@ 2017-12-06 10:03 ` David Gibson
0 siblings, 0 replies; 4+ messages in thread
From: David Gibson @ 2017-12-06 10:03 UTC (permalink / raw)
To: Igor Mammedov; +Cc: qemu-devel, agraf, ehabkost, qemu-ppc
[-- Attachment #1: Type: text/plain, Size: 2087 bytes --]
On Wed, Dec 06, 2017 at 10:57:32AM +0100, Igor Mammedov wrote:
> On Wed, 6 Dec 2017 11:14:06 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
>
> > On Tue, Dec 05, 2017 at 04:41:17PM +0100, Igor Mammedov wrote:
> > > SPAPR is the last user of numa_get_node() and a bunch of
> > > supporting code to maintain numa_info[x].addr list.
> > >
> > > Get LMB node id from pc-dimm list, which allows to
> > > remove ~80LOC maintaining dynamic address range
> > > lookup list.
> > >
> > > It also removes pc-dimm dependency on numa_[un]set_mem_node_id()
> > > and makes pc-dimms a sole source of information about which
> > > node it belongs to and removes duplicate data from global
> > > numa_info.
> > >
> > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > ---
> > > Beside making code simpler, my interest in simplification
> > > lies in allowing calling parse_numa_opts() multiple times,
> > > without complex cleanups in case NUMA config is changed
> > > since startup.
> > >
> > > PS:
> > > build tested only
> > > ---
> > > include/sysemu/numa.h | 10 ------
> > > hw/mem/pc-dimm.c | 2 --
> > > hw/ppc/spapr.c | 29 +++++++++++++++-
> > > numa.c | 94 ---------------------------------------------------
> > > 4 files changed, 28 insertions(+), 107 deletions(-)
> >
> > Applied to ppc-for-2.12.
> Thanks
>
> >
> > It definitely seems like an improvement over what we have. Looking
> > back at the DIMM list from QMP in the loop seems a little roundabout
> > though. Maybe we'd be better stepping through the DIMMs, then
> > stepping through the LMBs within each DIMM, rather than just stepping
> > through the LMBs directly.
> Surely that would be better, maybe someone from ppc side would take care
> of it.
Well, it's now on my vast list of things to look at if I ever have
time..
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-12-06 10:16 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-12-05 15:41 [Qemu-devel] [PATCH 2.12] spapr: replace numa_get_node() with lookup in pc-dimm list Igor Mammedov
2017-12-06 0:14 ` David Gibson
2017-12-06 9:57 ` Igor Mammedov
2017-12-06 10:03 ` David Gibson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).