* [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes
@ 2014-07-01 20:11 Nishanth Aravamudan
2014-07-01 20:13 ` [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc Nishanth Aravamudan
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Nishanth Aravamudan @ 2014-07-01 20:11 UTC (permalink / raw)
To: Eduardo Habkost
Cc: Michael S. Tsirkin, Alexey Kardashevskiy, Hu Tao, qemu-devel,
qemu-ppc, Anton Blanchard, David Rientjes, Igor Mammedov
As suggested by Eduardo Habkost, rename nb_numa_nodes to num_numa_nodes
throughout the code, as that reflects the use of the variable more
clearly.
Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 2cf22b1..12472c6 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -698,23 +698,23 @@ static FWCfgState *bochs_bios_init(void)
* of nodes, one word for each VCPU->node and one word for each node to
* hold the amount of memory.
*/
- numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
- numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
+ numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + num_numa_nodes);
+ numa_fw_cfg[0] = cpu_to_le64(num_numa_nodes);
for (i = 0; i < max_cpus; i++) {
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
assert(apic_id < apic_id_limit);
- for (j = 0; j < nb_numa_nodes; j++) {
+ for (j = 0; j < num_numa_nodes; j++) {
if (test_bit(i, numa_info[j].node_cpu)) {
numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
break;
}
}
}
- for (i = 0; i < nb_numa_nodes; i++) {
+ for (i = 0; i < num_numa_nodes; i++) {
numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
}
fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
- (1 + apic_id_limit + nb_numa_nodes) *
+ (1 + apic_id_limit + num_numa_nodes) *
sizeof(*numa_fw_cfg));
return fw_cfg;
@@ -1121,10 +1121,10 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
guest_info->apic_xrupt_override = kvm_allows_irq0_override();
- guest_info->numa_nodes = nb_numa_nodes;
+ guest_info->numa_nodes = num_numa_nodes;
guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
sizeof *guest_info->node_mem);
- for (i = 0; i < nb_numa_nodes; i++) {
+ for (i = 0; i < num_numa_nodes; i++) {
guest_info->node_mem[i] = numa_info[i].node_mem;
}
@@ -1134,7 +1134,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
for (i = 0; i < max_cpus; i++) {
unsigned int apic_id = x86_cpu_apic_id_from_index(i);
assert(apic_id < guest_info->apic_id_limit);
- for (j = 0; j < nb_numa_nodes; j++) {
+ for (j = 0; j < num_numa_nodes; j++) {
if (test_bit(i, numa_info[j].node_cpu)) {
guest_info->node_cpu[apic_id] = j;
break;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a8ba916..4b74fd6 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -226,7 +226,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
}
}
- if (nb_numa_nodes > 1) {
+ if (num_numa_nodes > 1) {
ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
sizeof(associativity));
if (ret < 0) {
@@ -608,7 +608,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
int i, off;
/* memory node(s) */
- if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
+ if (num_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
node0_size = numa_info[0].node_mem;
} else {
node0_size = ram_size;
@@ -642,7 +642,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
/* RAM: Node 1 and beyond */
mem_start = node0_size;
- for (i = 1; i < nb_numa_nodes; i++) {
+ for (i = 1; i < num_numa_nodes; i++) {
mem_reg_property[0] = cpu_to_be64(mem_start);
if (mem_start >= ram_size) {
node_size = 0;
@@ -792,7 +792,7 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
/* Update the RMA size if necessary */
if (spapr->vrma_adjust) {
- hwaddr node0_size = (nb_numa_nodes > 1) ?
+ hwaddr node0_size = (num_numa_nodes > 1) ?
numa_info[0].node_mem : ram_size;
spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift);
}
@@ -1225,7 +1225,7 @@ static void ppc_spapr_init(MachineState *machine)
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram = g_new(MemoryRegion, 1);
hwaddr rma_alloc_size;
- hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size;
+ hwaddr node0_size = (num_numa_nodes > 1) ? numa_info[0].node_mem : ram_size;
uint32_t initrd_base = 0;
long kernel_size = 0, initrd_size = 0;
long load_limit, rtas_limit, fw_size;
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index d8539fd..d17bdbe 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -146,7 +146,7 @@ extern int mem_prealloc;
*/
#define MAX_CPUMASK_BITS 255
-extern int nb_numa_nodes; /* Number of NUMA nodes */
+extern int num_numa_nodes; /* Number of NUMA nodes */
extern int max_numa_nodeid; /* Highest specified NUMA node ID, plus one.
* For all nodes, nodeid < max_numa_nodeid
*/
diff --git a/monitor.c b/monitor.c
index 5bc70a6..392677a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1948,8 +1948,8 @@ static void do_info_numa(Monitor *mon, const QDict *qdict)
int i;
CPUState *cpu;
- monitor_printf(mon, "%d nodes\n", nb_numa_nodes);
- for (i = 0; i < nb_numa_nodes; i++) {
+ monitor_printf(mon, "%d nodes\n", num_numa_nodes);
+ for (i = 0; i < num_numa_nodes; i++) {
monitor_printf(mon, "node %d cpus:", i);
CPU_FOREACH(cpu) {
if (cpu->numa_node == i) {
diff --git a/numa.c b/numa.c
index 2fde740..5930df0 100644
--- a/numa.c
+++ b/numa.c
@@ -53,7 +53,7 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
if (node->has_nodeid) {
nodenr = node->nodeid;
} else {
- nodenr = nb_numa_nodes;
+ nodenr = num_numa_nodes;
}
if (nodenr >= MAX_NODES) {
@@ -136,7 +136,7 @@ int numa_init_func(QemuOpts *opts, void *opaque)
if (err) {
goto error;
}
- nb_numa_nodes++;
+ num_numa_nodes++;
break;
default:
abort();
@@ -174,31 +174,31 @@ void set_numa_nodes(void)
}
/* This must be always true if all nodes are present: */
- assert(nb_numa_nodes == max_numa_nodeid);
+ assert(num_numa_nodes == max_numa_nodeid);
- if (nb_numa_nodes > 0) {
+ if (num_numa_nodes > 0) {
uint64_t numa_total;
- if (nb_numa_nodes > MAX_NODES) {
- nb_numa_nodes = MAX_NODES;
+ if (num_numa_nodes > MAX_NODES) {
+ num_numa_nodes = MAX_NODES;
}
/* If no memory size is given for any node, assume the default case
* and distribute the available memory equally across all nodes
*/
- for (i = 0; i < nb_numa_nodes; i++) {
+ for (i = 0; i < num_numa_nodes; i++) {
if (numa_info[i].node_mem != 0) {
break;
}
}
- if (i == nb_numa_nodes) {
+ if (i == num_numa_nodes) {
uint64_t usedmem = 0;
/* On Linux, each node's border has to be 8MB aligned,
* the final node gets the rest.
*/
- for (i = 0; i < nb_numa_nodes - 1; i++) {
- numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
+ for (i = 0; i < num_numa_nodes - 1; i++) {
+ numa_info[i].node_mem = (ram_size / num_numa_nodes) &
~((1 << 23UL) - 1);
usedmem += numa_info[i].node_mem;
}
@@ -206,7 +206,7 @@ void set_numa_nodes(void)
}
numa_total = 0;
- for (i = 0; i < nb_numa_nodes; i++) {
+ for (i = 0; i < num_numa_nodes; i++) {
numa_total += numa_info[i].node_mem;
}
if (numa_total != ram_size) {
@@ -216,7 +216,7 @@ void set_numa_nodes(void)
exit(1);
}
- for (i = 0; i < nb_numa_nodes; i++) {
+ for (i = 0; i < num_numa_nodes; i++) {
if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
break;
}
@@ -225,9 +225,9 @@ void set_numa_nodes(void)
* must cope with this anyway, because there are BIOSes out there in
* real machines which also use this scheme.
*/
- if (i == nb_numa_nodes) {
+ if (i == num_numa_nodes) {
for (i = 0; i < max_cpus; i++) {
- set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
+ set_bit(i, numa_info[i % num_numa_nodes].node_cpu);
}
}
}
@@ -239,7 +239,7 @@ void set_numa_modes(void)
int i;
CPU_FOREACH(cpu) {
- for (i = 0; i < nb_numa_nodes; i++) {
+ for (i = 0; i < num_numa_nodes; i++) {
if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
cpu->numa_node = i;
}
@@ -282,7 +282,7 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
uint64_t addr = 0;
int i;
- if (nb_numa_nodes == 0 || !have_memdevs) {
+ if (num_numa_nodes == 0 || !have_memdevs) {
allocate_system_memory_nonnuma(mr, owner, name, ram_size);
return;
}
diff --git a/vl.c b/vl.c
index 6e084c2..0de54c5 100644
--- a/vl.c
+++ b/vl.c
@@ -195,7 +195,7 @@ struct FWBootEntry {
static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
QTAILQ_HEAD_INITIALIZER(fw_boot_order);
-int nb_numa_nodes;
+int num_numa_nodes;
int max_numa_nodeid;
NodeInfo numa_info[MAX_NODES];
@@ -2991,7 +2991,7 @@ int main(int argc, char **argv, char **envp)
bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
}
- nb_numa_nodes = 0;
+ num_numa_nodes = 0;
max_numa_nodeid = 0;
nb_nics = 0;
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc
2014-07-01 20:11 [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes Nishanth Aravamudan
@ 2014-07-01 20:13 ` Nishanth Aravamudan
2014-07-01 20:39 ` Eduardo Habkost
2014-07-01 20:42 ` [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes Eduardo Habkost
2014-07-03 7:03 ` Michael S. Tsirkin
2 siblings, 1 reply; 10+ messages in thread
From: Nishanth Aravamudan @ 2014-07-01 20:13 UTC (permalink / raw)
To: Eduardo Habkost
Cc: Michael S. Tsirkin, Alexey Kardashevskiy, Hu Tao, qemu-devel,
qemu-ppc, Anton Blanchard, David Rientjes, Igor Mammedov
Sparse node numbering occurs on powerpc in practice under PowerVM. In
order to emulate the same NUMA topology under qemu, the assumption that
NUMA nodes are linearly ordered has to be removed. qemu was recently
modified to reject requests for sparse NUMA node numbering.
Leverage the present field in NodeInfo which indicates if a given nodeid
was present on the command-line or not. Adjust the iteration of various
NUMA loops to use the maximum known NUMA ID rather than the number of
NUMA nodes.
numa.c::set_numa_nodes() has become a bit more convoluted for
round-robin'ing the CPUs over known nodes when not specified by the
user.
Note that the x86 code needs changes for both sparse node numbering and
memoryless nodes, and the ppc code needs changes for memoryless nodes.
ppc also requires node 0 to be present on the command-line (or else it
fails with: "qemu: pSeries SLOF firmware requires >= 128M guest RMA
(Real Mode Area memory)". Alexey has a series to address the latter.
Examples:
Before:
mlock=off -numa node,nodeid=3 -numa node,nodeid=0 -smp 4
qemu-system-ppc64-base: numa: Node ID missing: 2
After:
(qemu) info numa
2 nodes
node 0 cpus: 1 3
node 0 size: 2048 MB
node 3 cpus: 0 2
node 3 size: 2048 MB
available: 2 nodes (0,3)
node 0 cpus: 1 3
node 0 size: 2030 MB
node 0 free: 1934 MB
node 3 cpus: 0 2
node 3 size: 2045 MB
node 3 free: 1957 MB
node distances:
node 0 3
0: 10 40
3: 40 10
Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Hu Tao <hutao@cn.fujitsu.com>
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: qemu-devel@nongnu.org
Cc: qemu-ppc@nongnu.org
---
I understand that 2.1 is in freeze, so I'm not requesting this be
applied, but would like feedback to have this ready to queue up for the
next release.
v1 -> v2:
Modify set_numa_nodes loop for round-robin'ing CPUs.
v2 -> v3:
Updated changelog to indicate problem being solved.
Updated memory_region_allocate_system_memory based upon feedback from
Hu.
Updated set_numa_nodes loop again to be simpler based upon feedback
from Hu.
Fixed bug with a mix of nodes with nodeid specified and without, where
the same nodeid would be used by the explicit specification and the
auto-numbering code.
v3 -> v4:
Rename nb_numa_nodes to num_numa_nodes to help catch usage.
Rebased to master/origin as Eduardo's patches to support the Present
flag have gone upstream.
Disable sparse node numbering on i386, enable it on ppc.
v4 -> v5:
Split rename of nb_numa_nodes to separate patch.
Fix checkpatch warnings.
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 12472c6..cdefafe 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1121,6 +1121,18 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
guest_info->apic_xrupt_override = kvm_allows_irq0_override();
+ /* No support for sparse NUMA node IDs yet: */
+ for (i = max_numa_nodeid - 1; i >= 0; i--) {
+ /* Report large node IDs first, to make mistakes easier to spot */
+ if (!numa_info[i].present) {
+ error_report("numa: Node ID missing: %d", i);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ /* This must be always true if all nodes are present */
+ assert(num_numa_nodes == max_numa_nodeid);
+
guest_info->numa_nodes = num_numa_nodes;
guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
sizeof *guest_info->node_mem);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4b74fd6..9a247f8 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -642,7 +642,10 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
/* RAM: Node 1 and beyond */
mem_start = node0_size;
- for (i = 1; i < num_numa_nodes; i++) {
+ for (i = 1; i < max_numa_nodeid; i++) {
+ if (!numa_info[i].present) {
+ continue;
+ }
mem_reg_property[0] = cpu_to_be64(mem_start);
if (mem_start >= ram_size) {
node_size = 0;
diff --git a/monitor.c b/monitor.c
index 392677a..53955e4 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1949,7 +1949,10 @@ static void do_info_numa(Monitor *mon, const QDict *qdict)
CPUState *cpu;
monitor_printf(mon, "%d nodes\n", num_numa_nodes);
- for (i = 0; i < num_numa_nodes; i++) {
+ for (i = 0; i < max_numa_nodeid; i++) {
+ if (!numa_info[i].present) {
+ continue;
+ }
monitor_printf(mon, "node %d cpus:", i);
CPU_FOREACH(cpu) {
if (cpu->numa_node == i) {
diff --git a/numa.c b/numa.c
index 5930df0..a689e52 100644
--- a/numa.c
+++ b/numa.c
@@ -53,7 +53,10 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
if (node->has_nodeid) {
nodenr = node->nodeid;
} else {
- nodenr = num_numa_nodes;
+ nodenr = 0;
+ while (numa_info[nodenr].present) {
+ nodenr++;
+ }
}
if (nodenr >= MAX_NODES) {
@@ -160,22 +163,10 @@ error:
void set_numa_nodes(void)
{
- int i;
+ int i, j;
assert(max_numa_nodeid <= MAX_NODES);
- /* No support for sparse NUMA node IDs yet: */
- for (i = max_numa_nodeid - 1; i >= 0; i--) {
- /* Report large node IDs first, to make mistakes easier to spot */
- if (!numa_info[i].present) {
- error_report("numa: Node ID missing: %d", i);
- exit(1);
- }
- }
-
- /* This must be always true if all nodes are present: */
- assert(num_numa_nodes == max_numa_nodeid);
-
if (num_numa_nodes > 0) {
uint64_t numa_total;
@@ -186,27 +177,30 @@ void set_numa_nodes(void)
/* If no memory size is given for any node, assume the default case
* and distribute the available memory equally across all nodes
*/
- for (i = 0; i < num_numa_nodes; i++) {
- if (numa_info[i].node_mem != 0) {
+ for (i = 0; i < max_numa_nodeid; i++) {
+ if (numa_info[i].present && numa_info[i].node_mem != 0) {
break;
}
}
- if (i == num_numa_nodes) {
+ if (i == max_numa_nodeid) {
uint64_t usedmem = 0;
/* On Linux, each node's border has to be 8MB aligned,
* the final node gets the rest.
*/
- for (i = 0; i < num_numa_nodes - 1; i++) {
- numa_info[i].node_mem = (ram_size / num_numa_nodes) &
- ~((1 << 23UL) - 1);
- usedmem += numa_info[i].node_mem;
+ for (i = 0; i < max_numa_nodeid - 1; i++) {
+ if (numa_info[i].present) {
+ numa_info[i].node_mem = (ram_size / num_numa_nodes) &
+ ~((1 << 23UL) - 1);
+ usedmem += numa_info[i].node_mem;
+ }
}
+ assert(numa_info[i].present);
numa_info[i].node_mem = ram_size - usedmem;
}
numa_total = 0;
- for (i = 0; i < num_numa_nodes; i++) {
+ for (i = 0; i < max_numa_nodeid; i++) {
numa_total += numa_info[i].node_mem;
}
if (numa_total != ram_size) {
@@ -216,8 +210,9 @@ void set_numa_nodes(void)
exit(1);
}
- for (i = 0; i < num_numa_nodes; i++) {
- if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
+ for (i = 0; i < max_numa_nodeid; i++) {
+ if (numa_info[i].present &&
+ !bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
break;
}
}
@@ -225,9 +220,12 @@ void set_numa_nodes(void)
* must cope with this anyway, because there are BIOSes out there in
* real machines which also use this scheme.
*/
- if (i == num_numa_nodes) {
- for (i = 0; i < max_cpus; i++) {
- set_bit(i, numa_info[i % num_numa_nodes].node_cpu);
+ if (i == max_numa_nodeid) {
+ for (i = 0, j = 0; i < max_cpus; i++) {
+ do {
+ j = (j + 1) % (max_numa_nodeid);
+ } while (!numa_info[j].present);
+ set_bit(i, numa_info[j].node_cpu);
}
}
}
@@ -239,8 +237,9 @@ void set_numa_modes(void)
int i;
CPU_FOREACH(cpu) {
- for (i = 0; i < num_numa_nodes; i++) {
- if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
+ for (i = 0; i < max_numa_nodeid; i++) {
+ if (numa_info[i].present &&
+ test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
cpu->numa_node = i;
}
}
@@ -288,10 +287,13 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
}
memory_region_init(mr, owner, name, ram_size);
- for (i = 0; i < MAX_NODES; i++) {
+ for (i = 0; i < max_numa_nodeid; i++) {
Error *local_err = NULL;
uint64_t size = numa_info[i].node_mem;
HostMemoryBackend *backend = numa_info[i].node_memdev;
+ if (!numa_info[i].present) {
+ continue;
+ }
if (!backend) {
continue;
}
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc
2014-07-01 20:13 ` [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc Nishanth Aravamudan
@ 2014-07-01 20:39 ` Eduardo Habkost
2014-07-01 20:50 ` Nishanth Aravamudan
0 siblings, 1 reply; 10+ messages in thread
From: Eduardo Habkost @ 2014-07-01 20:39 UTC (permalink / raw)
To: Nishanth Aravamudan
Cc: Michael S. Tsirkin, Alexey Kardashevskiy, Hu Tao, qemu-devel,
qemu-ppc, Anton Blanchard, David Rientjes, Igor Mammedov
On Tue, Jul 01, 2014 at 01:13:28PM -0700, Nishanth Aravamudan wrote:
[...]
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 12472c6..cdefafe 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1121,6 +1121,18 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
> guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
> guest_info->apic_xrupt_override = kvm_allows_irq0_override();
> + /* No support for sparse NUMA node IDs yet: */
> + for (i = max_numa_nodeid - 1; i >= 0; i--) {
> + /* Report large node IDs first, to make mistakes easier to spot */
> + if (!numa_info[i].present) {
> + error_report("numa: Node ID missing: %d", i);
> + exit(EXIT_FAILURE);
> + }
> + }
> +
> + /* This must be always true if all nodes are present */
> + assert(num_numa_nodes == max_numa_nodeid);
> +
I wonder if there's a better place where we could put this check.
> guest_info->numa_nodes = num_numa_nodes;
> guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
> sizeof *guest_info->node_mem);
[...]
> diff --git a/numa.c b/numa.c
> index 5930df0..a689e52 100644
> --- a/numa.c
> +++ b/numa.c
[...]
> @@ -225,9 +220,12 @@ void set_numa_nodes(void)
> * must cope with this anyway, because there are BIOSes out there in
> * real machines which also use this scheme.
> */
> - if (i == num_numa_nodes) {
> - for (i = 0; i < max_cpus; i++) {
> - set_bit(i, numa_info[i % num_numa_nodes].node_cpu);
> + if (i == max_numa_nodeid) {
> + for (i = 0, j = 0; i < max_cpus; i++) {
Doesn't j need to be initialized to -1, here?
Except for that, patch looks good to me. But I would be more comfortable
with it if we had automated tests to help ensure we are not breaking
compatibility of existing NUMA command-line conbinations with these
changes.
> + do {
> + j = (j + 1) % (max_numa_nodeid);
> + } while (!numa_info[j].present);
> + set_bit(i, numa_info[j].node_cpu);
> }
> }
> }
[...]
>
--
Eduardo
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes
2014-07-01 20:11 [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes Nishanth Aravamudan
2014-07-01 20:13 ` [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc Nishanth Aravamudan
@ 2014-07-01 20:42 ` Eduardo Habkost
2014-07-03 7:03 ` Michael S. Tsirkin
2 siblings, 0 replies; 10+ messages in thread
From: Eduardo Habkost @ 2014-07-01 20:42 UTC (permalink / raw)
To: Nishanth Aravamudan
Cc: Michael S. Tsirkin, Alexey Kardashevskiy, Hu Tao, qemu-devel,
qemu-ppc, Anton Blanchard, David Rientjes, Igor Mammedov
On Tue, Jul 01, 2014 at 01:11:08PM -0700, Nishanth Aravamudan wrote:
> As suggested by Eduardo Habkost, rename nb_numa_nodes to num_numa_nodes
> throughout the code, as that reflects the use of the variable more
> clearly.
>
> Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
--
Eduardo
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc
2014-07-01 20:39 ` Eduardo Habkost
@ 2014-07-01 20:50 ` Nishanth Aravamudan
2014-07-02 18:21 ` Eduardo Habkost
0 siblings, 1 reply; 10+ messages in thread
From: Nishanth Aravamudan @ 2014-07-01 20:50 UTC (permalink / raw)
To: Eduardo Habkost
Cc: Michael S. Tsirkin, Alexey Kardashevskiy, Hu Tao, qemu-devel,
qemu-ppc, Anton Blanchard, David Rientjes, Igor Mammedov
On 01.07.2014 [17:39:57 -0300], Eduardo Habkost wrote:
> On Tue, Jul 01, 2014 at 01:13:28PM -0700, Nishanth Aravamudan wrote:
> [...]
> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 12472c6..cdefafe 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -1121,6 +1121,18 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> > guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
> > guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
> > guest_info->apic_xrupt_override = kvm_allows_irq0_override();
> > + /* No support for sparse NUMA node IDs yet: */
> > + for (i = max_numa_nodeid - 1; i >= 0; i--) {
> > + /* Report large node IDs first, to make mistakes easier to spot */
> > + if (!numa_info[i].present) {
> > + error_report("numa: Node ID missing: %d", i);
> > + exit(EXIT_FAILURE);
> > + }
> > + }
> > +
> > + /* This must be always true if all nodes are present */
> > + assert(num_numa_nodes == max_numa_nodeid);
> > +
>
> I wonder if there's a better place where we could put this check.
Well, only i386 and ppc support NUMA, afaict. So I'm not sure where it
makes sense to put it. I guess we could have a flag that the
architectures set that indicates sparse NUMA support or not, and put
this in the generic code.
Or do you mean putting this check somewhere else in the PC init code?
> > guest_info->numa_nodes = num_numa_nodes;
> > guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
> > sizeof *guest_info->node_mem);
> [...]
> > diff --git a/numa.c b/numa.c
> > index 5930df0..a689e52 100644
> > --- a/numa.c
> > +++ b/numa.c
> [...]
> > @@ -225,9 +220,12 @@ void set_numa_nodes(void)
> > * must cope with this anyway, because there are BIOSes out there in
> > * real machines which also use this scheme.
> > */
> > - if (i == num_numa_nodes) {
> > - for (i = 0; i < max_cpus; i++) {
> > - set_bit(i, numa_info[i % num_numa_nodes].node_cpu);
> > + if (i == max_numa_nodeid) {
> > + for (i = 0, j = 0; i < max_cpus; i++) {
>
> Doesn't j need to be initialized to -1, here?
Arrgh, sorry had been messing with your suggestion to use a while loop.
You're right, it needs to be -1 here.
> Except for that, patch looks good to me. But I would be more comfortable
> with it if we had automated tests to help ensure we are not breaking
> compatibility of existing NUMA command-line conbinations with these
> changes.
Is that the test target in the qemu source? Are there examples of any
such NUMA tests already?
Thanks,
Nish
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc
2014-07-01 20:50 ` Nishanth Aravamudan
@ 2014-07-02 18:21 ` Eduardo Habkost
2014-07-02 21:02 ` Nishanth Aravamudan
0 siblings, 1 reply; 10+ messages in thread
From: Eduardo Habkost @ 2014-07-02 18:21 UTC (permalink / raw)
To: Nishanth Aravamudan
Cc: Michael S. Tsirkin, Alexey Kardashevskiy, Hu Tao, qemu-devel,
qemu-ppc, Anton Blanchard, David Rientjes, Igor Mammedov
On Tue, Jul 01, 2014 at 01:50:06PM -0700, Nishanth Aravamudan wrote:
> On 01.07.2014 [17:39:57 -0300], Eduardo Habkost wrote:
> > On Tue, Jul 01, 2014 at 01:13:28PM -0700, Nishanth Aravamudan wrote:
> > [...]
> > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > index 12472c6..cdefafe 100644
> > > --- a/hw/i386/pc.c
> > > +++ b/hw/i386/pc.c
> > > @@ -1121,6 +1121,18 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> > > guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
> > > guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
> > > guest_info->apic_xrupt_override = kvm_allows_irq0_override();
> > > + /* No support for sparse NUMA node IDs yet: */
> > > + for (i = max_numa_nodeid - 1; i >= 0; i--) {
> > > + /* Report large node IDs first, to make mistakes easier to spot */
> > > + if (!numa_info[i].present) {
> > > + error_report("numa: Node ID missing: %d", i);
> > > + exit(EXIT_FAILURE);
> > > + }
> > > + }
> > > +
> > > + /* This must be always true if all nodes are present */
> > > + assert(num_numa_nodes == max_numa_nodeid);
> > > +
> >
> > I wonder if there's a better place where we could put this check.
>
> Well, only i386 and ppc support NUMA, afaict. So I'm not sure where it
> makes sense to put it. I guess we could have a flag that the
> architectures set that indicates sparse NUMA support or not, and put
> this in the generic code.
>
> Or do you mean putting this check somewhere else in the PC init code?
I mean somewhere else in the PC init code. But as today the code that
calls pc_guest_info_init() and pc_memory_init() is duplicated in both
pc_piix.c and pc_q35.c, this looks like the best place we have.
>
> > > guest_info->numa_nodes = num_numa_nodes;
> > > guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
> > > sizeof *guest_info->node_mem);
> > [...]
> > > diff --git a/numa.c b/numa.c
> > > index 5930df0..a689e52 100644
> > > --- a/numa.c
> > > +++ b/numa.c
> > [...]
> > > @@ -225,9 +220,12 @@ void set_numa_nodes(void)
> > > * must cope with this anyway, because there are BIOSes out there in
> > > * real machines which also use this scheme.
> > > */
> > > - if (i == num_numa_nodes) {
> > > - for (i = 0; i < max_cpus; i++) {
> > > - set_bit(i, numa_info[i % num_numa_nodes].node_cpu);
> > > + if (i == max_numa_nodeid) {
> > > + for (i = 0, j = 0; i < max_cpus; i++) {
> >
> > Doesn't j need to be initialized to -1, here?
>
> Arrgh, sorry had been messing with your suggestion to use a while loop.
> You're right, it needs to be -1 here.
>
> > Except for that, patch looks good to me. But I would be more comfortable
> > with it if we had automated tests to help ensure we are not breaking
> > compatibility of existing NUMA command-line conbinations with these
> > changes.
>
> Is that the test target in the qemu source? Are there examples of any
> such NUMA tests already?
I use 'make check' to run them, they are in the tests/ directory.
I am not aware of any NUMA-related test, but I see two possible ways of
testing it: using qtest and asking for for the NUMA node info through
the monitor, or a unit test for numa.c that simply calls
numa_node_parse() and set_numa_nodes(), and then checks the result on
numa_info[] directly.
A third option may be using qtest and checking the resulting ACPI tables
directly. It would cover even more code, but would be specific to PC.
The tests won't be a requirement to me, but they would surely be welcome
(and would have detected the j=0 mistake above).
--
Eduardo
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc
2014-07-02 18:21 ` Eduardo Habkost
@ 2014-07-02 21:02 ` Nishanth Aravamudan
2014-07-02 21:52 ` Eduardo Habkost
0 siblings, 1 reply; 10+ messages in thread
From: Nishanth Aravamudan @ 2014-07-02 21:02 UTC (permalink / raw)
To: Eduardo Habkost
Cc: Michael S. Tsirkin, Alexey Kardashevskiy, Hu Tao, qemu-devel,
qemu-ppc, Anton Blanchard, David Rientjes, Igor Mammedov
On 02.07.2014 [15:21:38 -0300], Eduardo Habkost wrote:
> On Tue, Jul 01, 2014 at 01:50:06PM -0700, Nishanth Aravamudan wrote:
> > On 01.07.2014 [17:39:57 -0300], Eduardo Habkost wrote:
> > > On Tue, Jul 01, 2014 at 01:13:28PM -0700, Nishanth Aravamudan wrote:
> > > [...]
> > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > > index 12472c6..cdefafe 100644
> > > > --- a/hw/i386/pc.c
> > > > +++ b/hw/i386/pc.c
> > > > @@ -1121,6 +1121,18 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> > > > guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
> > > > guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
> > > > guest_info->apic_xrupt_override = kvm_allows_irq0_override();
> > > > + /* No support for sparse NUMA node IDs yet: */
> > > > + for (i = max_numa_nodeid - 1; i >= 0; i--) {
> > > > + /* Report large node IDs first, to make mistakes easier to spot */
> > > > + if (!numa_info[i].present) {
> > > > + error_report("numa: Node ID missing: %d", i);
> > > > + exit(EXIT_FAILURE);
> > > > + }
> > > > + }
> > > > +
> > > > + /* This must be always true if all nodes are present */
> > > > + assert(num_numa_nodes == max_numa_nodeid);
> > > > +
> > >
> > > I wonder if there's a better place where we could put this check.
> >
> > Well, only i386 and ppc support NUMA, afaict. So I'm not sure where it
> > makes sense to put it. I guess we could have a flag that the
> > architectures set that indicates sparse NUMA support or not, and put
> > this in the generic code.
> >
> > Or do you mean putting this check somewhere else in the PC init code?
>
> I mean somewhere else in the PC init code. But as today the code that
> calls pc_guest_info_init() and pc_memory_init() is duplicated in both
> pc_piix.c and pc_q35.c, this looks like the best place we have.
Ok, so if I send out another revision with the fixed j initialization
below, is there anything else in my changes that you would like fixed?
> > > > guest_info->numa_nodes = num_numa_nodes;
> > > > guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
> > > > sizeof *guest_info->node_mem);
> > > [...]
> > > > diff --git a/numa.c b/numa.c
> > > > index 5930df0..a689e52 100644
> > > > --- a/numa.c
> > > > +++ b/numa.c
> > > [...]
> > > > @@ -225,9 +220,12 @@ void set_numa_nodes(void)
> > > > * must cope with this anyway, because there are BIOSes out there in
> > > > * real machines which also use this scheme.
> > > > */
> > > > - if (i == num_numa_nodes) {
> > > > - for (i = 0; i < max_cpus; i++) {
> > > > - set_bit(i, numa_info[i % num_numa_nodes].node_cpu);
> > > > + if (i == max_numa_nodeid) {
> > > > + for (i = 0, j = 0; i < max_cpus; i++) {
> > >
> > > Doesn't j need to be initialized to -1, here?
> >
> > Arrgh, sorry had been messing with your suggestion to use a while loop.
> > You're right, it needs to be -1 here.
> >
> > > Except for that, patch looks good to me. But I would be more comfortable
> > > with it if we had automated tests to help ensure we are not breaking
> > > compatibility of existing NUMA command-line conbinations with these
> > > changes.
> >
> > Is that the test target in the qemu source? Are there examples of any
> > such NUMA tests already?
>
> I use 'make check' to run them, they are in the tests/ directory.
Got it, thanks.
> I am not aware of any NUMA-related test, but I see two possible ways of
> testing it: using qtest and asking for for the NUMA node info through
> the monitor, or a unit test for numa.c that simply calls
> numa_node_parse() and set_numa_nodes(), and then checks the result on
> numa_info[] directly.
Do you have a preference for which of these to do?
> A third option may be using qtest and checking the resulting ACPI tables
> directly. It would cover even more code, but would be specific to PC.
I'm not comfortable saying I can get to this, as I still don't really
know the ACPI code, but I can put it on my todo list, at least.
> The tests won't be a requirement to me, but they would surely be welcome
> (and would have detected the j=0 mistake above).
I think it makes sense to put this in now, as it would have caught the
original issue(s) with sparse node numbering as well.
Thanks,
Nish
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc
2014-07-02 21:02 ` Nishanth Aravamudan
@ 2014-07-02 21:52 ` Eduardo Habkost
0 siblings, 0 replies; 10+ messages in thread
From: Eduardo Habkost @ 2014-07-02 21:52 UTC (permalink / raw)
To: Nishanth Aravamudan
Cc: Michael S. Tsirkin, Alexey Kardashevskiy, Hu Tao, qemu-devel,
qemu-ppc, Anton Blanchard, David Rientjes, Igor Mammedov
On Wed, Jul 02, 2014 at 02:02:14PM -0700, Nishanth Aravamudan wrote:
> On 02.07.2014 [15:21:38 -0300], Eduardo Habkost wrote:
> > On Tue, Jul 01, 2014 at 01:50:06PM -0700, Nishanth Aravamudan wrote:
> > > On 01.07.2014 [17:39:57 -0300], Eduardo Habkost wrote:
> > > > On Tue, Jul 01, 2014 at 01:13:28PM -0700, Nishanth Aravamudan wrote:
> > > > [...]
> > > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > > > index 12472c6..cdefafe 100644
> > > > > --- a/hw/i386/pc.c
> > > > > +++ b/hw/i386/pc.c
> > > > > @@ -1121,6 +1121,18 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> > > > > guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
> > > > > guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
> > > > > guest_info->apic_xrupt_override = kvm_allows_irq0_override();
> > > > > + /* No support for sparse NUMA node IDs yet: */
> > > > > + for (i = max_numa_nodeid - 1; i >= 0; i--) {
> > > > > + /* Report large node IDs first, to make mistakes easier to spot */
> > > > > + if (!numa_info[i].present) {
> > > > > + error_report("numa: Node ID missing: %d", i);
> > > > > + exit(EXIT_FAILURE);
> > > > > + }
> > > > > + }
> > > > > +
> > > > > + /* This must be always true if all nodes are present */
> > > > > + assert(num_numa_nodes == max_numa_nodeid);
> > > > > +
> > > >
> > > > I wonder if there's a better place where we could put this check.
> > >
> > > Well, only i386 and ppc support NUMA, afaict. So I'm not sure where it
> > > makes sense to put it. I guess we could have a flag that the
> > > architectures set that indicates sparse NUMA support or not, and put
> > > this in the generic code.
> > >
> > > Or do you mean putting this check somewhere else in the PC init code?
> >
> > I mean somewhere else in the PC init code. But as today the code that
> > calls pc_guest_info_init() and pc_memory_init() is duplicated in both
> > pc_piix.c and pc_q35.c, this looks like the best place we have.
>
> Ok, so if I send out another revision with the fixed j initialization
> below, is there anything else in my changes that you would like fixed?
I don't see any additional issues.
>
[...]
> > > > Except for that, patch looks good to me. But I would be more comfortable
> > > > with it if we had automated tests to help ensure we are not breaking
> > > > compatibility of existing NUMA command-line conbinations with these
> > > > changes.
> > >
> > > Is that the test target in the qemu source? Are there examples of any
> > > such NUMA tests already?
> >
> > I use 'make check' to run them, they are in the tests/ directory.
>
> Got it, thanks.
>
> > I am not aware of any NUMA-related test, but I see two possible ways of
> > testing it: using qtest and asking for for the NUMA node info through
> > the monitor, or a unit test for numa.c that simply calls
> > numa_node_parse() and set_numa_nodes(), and then checks the result on
> > numa_info[] directly.
>
> Do you have a preference for which of these to do?
The one we find to be easier. :)
An unit test may require untangling numa.o dependencies. qtest will
probably require parsing the "info numa" output.
A qtest case would cover more code (not just numa.c, but command-ilne
handling on vl.c, and monitor code).
>
> > A third option may be using qtest and checking the resulting ACPI tables
> > directly. It would cover even more code, but would be specific to PC.
>
> I'm not comfortable saying I can get to this, as I still don't really
> know the ACPI code, but I can put it on my todo list, at least.
>
> > The tests won't be a requirement to me, but they would surely be welcome
> > (and would have detected the j=0 mistake above).
>
> I think it makes sense to put this in now, as it would have caught the
> original issue(s) with sparse node numbering as well.
>
> Thanks,
> Nish
>
--
Eduardo
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes
2014-07-01 20:11 [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes Nishanth Aravamudan
2014-07-01 20:13 ` [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc Nishanth Aravamudan
2014-07-01 20:42 ` [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes Eduardo Habkost
@ 2014-07-03 7:03 ` Michael S. Tsirkin
2014-07-03 16:31 ` Nishanth Aravamudan
2 siblings, 1 reply; 10+ messages in thread
From: Michael S. Tsirkin @ 2014-07-03 7:03 UTC (permalink / raw)
To: Nishanth Aravamudan
Cc: Eduardo Habkost, Alexey Kardashevskiy, Hu Tao, qemu-devel,
qemu-ppc, Anton Blanchard, David Rientjes, Igor Mammedov
On Tue, Jul 01, 2014 at 01:11:08PM -0700, Nishanth Aravamudan wrote:
> As suggested by Eduardo Habkost, rename nb_numa_nodes to num_numa_nodes
> throughout the code, as that reflects the use of the variable more
> clearly.
>
> Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
So far so good but we are past soft freeze. So please keep reposting as
RFC to avoid noise, and repost without RFC tag after 2.1 is out.
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 2cf22b1..12472c6 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -698,23 +698,23 @@ static FWCfgState *bochs_bios_init(void)
> * of nodes, one word for each VCPU->node and one word for each node to
> * hold the amount of memory.
> */
> - numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
> - numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
> + numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + num_numa_nodes);
> + numa_fw_cfg[0] = cpu_to_le64(num_numa_nodes);
> for (i = 0; i < max_cpus; i++) {
> unsigned int apic_id = x86_cpu_apic_id_from_index(i);
> assert(apic_id < apic_id_limit);
> - for (j = 0; j < nb_numa_nodes; j++) {
> + for (j = 0; j < num_numa_nodes; j++) {
> if (test_bit(i, numa_info[j].node_cpu)) {
> numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
> break;
> }
> }
> }
> - for (i = 0; i < nb_numa_nodes; i++) {
> + for (i = 0; i < num_numa_nodes; i++) {
> numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem);
> }
> fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
> - (1 + apic_id_limit + nb_numa_nodes) *
> + (1 + apic_id_limit + num_numa_nodes) *
> sizeof(*numa_fw_cfg));
>
> return fw_cfg;
> @@ -1121,10 +1121,10 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> guest_info->ram_size = below_4g_mem_size + above_4g_mem_size;
> guest_info->apic_id_limit = pc_apic_id_limit(max_cpus);
> guest_info->apic_xrupt_override = kvm_allows_irq0_override();
> - guest_info->numa_nodes = nb_numa_nodes;
> + guest_info->numa_nodes = num_numa_nodes;
> guest_info->node_mem = g_malloc0(guest_info->numa_nodes *
> sizeof *guest_info->node_mem);
> - for (i = 0; i < nb_numa_nodes; i++) {
> + for (i = 0; i < num_numa_nodes; i++) {
> guest_info->node_mem[i] = numa_info[i].node_mem;
> }
>
> @@ -1134,7 +1134,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> for (i = 0; i < max_cpus; i++) {
> unsigned int apic_id = x86_cpu_apic_id_from_index(i);
> assert(apic_id < guest_info->apic_id_limit);
> - for (j = 0; j < nb_numa_nodes; j++) {
> + for (j = 0; j < num_numa_nodes; j++) {
> if (test_bit(i, numa_info[j].node_cpu)) {
> guest_info->node_cpu[apic_id] = j;
> break;
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index a8ba916..4b74fd6 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -226,7 +226,7 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
> }
> }
>
> - if (nb_numa_nodes > 1) {
> + if (num_numa_nodes > 1) {
> ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
> sizeof(associativity));
> if (ret < 0) {
> @@ -608,7 +608,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
> int i, off;
>
> /* memory node(s) */
> - if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
> + if (num_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
> node0_size = numa_info[0].node_mem;
> } else {
> node0_size = ram_size;
> @@ -642,7 +642,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
>
> /* RAM: Node 1 and beyond */
> mem_start = node0_size;
> - for (i = 1; i < nb_numa_nodes; i++) {
> + for (i = 1; i < num_numa_nodes; i++) {
> mem_reg_property[0] = cpu_to_be64(mem_start);
> if (mem_start >= ram_size) {
> node_size = 0;
> @@ -792,7 +792,7 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
>
> /* Update the RMA size if necessary */
> if (spapr->vrma_adjust) {
> - hwaddr node0_size = (nb_numa_nodes > 1) ?
> + hwaddr node0_size = (num_numa_nodes > 1) ?
> numa_info[0].node_mem : ram_size;
> spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift);
> }
> @@ -1225,7 +1225,7 @@ static void ppc_spapr_init(MachineState *machine)
> MemoryRegion *sysmem = get_system_memory();
> MemoryRegion *ram = g_new(MemoryRegion, 1);
> hwaddr rma_alloc_size;
> - hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size;
> + hwaddr node0_size = (num_numa_nodes > 1) ? numa_info[0].node_mem : ram_size;
> uint32_t initrd_base = 0;
> long kernel_size = 0, initrd_size = 0;
> long load_limit, rtas_limit, fw_size;
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index d8539fd..d17bdbe 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -146,7 +146,7 @@ extern int mem_prealloc;
> */
> #define MAX_CPUMASK_BITS 255
>
> -extern int nb_numa_nodes; /* Number of NUMA nodes */
> +extern int num_numa_nodes; /* Number of NUMA nodes */
> extern int max_numa_nodeid; /* Highest specified NUMA node ID, plus one.
> * For all nodes, nodeid < max_numa_nodeid
> */
> diff --git a/monitor.c b/monitor.c
> index 5bc70a6..392677a 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -1948,8 +1948,8 @@ static void do_info_numa(Monitor *mon, const QDict *qdict)
> int i;
> CPUState *cpu;
>
> - monitor_printf(mon, "%d nodes\n", nb_numa_nodes);
> - for (i = 0; i < nb_numa_nodes; i++) {
> + monitor_printf(mon, "%d nodes\n", num_numa_nodes);
> + for (i = 0; i < num_numa_nodes; i++) {
> monitor_printf(mon, "node %d cpus:", i);
> CPU_FOREACH(cpu) {
> if (cpu->numa_node == i) {
> diff --git a/numa.c b/numa.c
> index 2fde740..5930df0 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -53,7 +53,7 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
> if (node->has_nodeid) {
> nodenr = node->nodeid;
> } else {
> - nodenr = nb_numa_nodes;
> + nodenr = num_numa_nodes;
> }
>
> if (nodenr >= MAX_NODES) {
> @@ -136,7 +136,7 @@ int numa_init_func(QemuOpts *opts, void *opaque)
> if (err) {
> goto error;
> }
> - nb_numa_nodes++;
> + num_numa_nodes++;
> break;
> default:
> abort();
> @@ -174,31 +174,31 @@ void set_numa_nodes(void)
> }
>
> /* This must be always true if all nodes are present: */
> - assert(nb_numa_nodes == max_numa_nodeid);
> + assert(num_numa_nodes == max_numa_nodeid);
>
> - if (nb_numa_nodes > 0) {
> + if (num_numa_nodes > 0) {
> uint64_t numa_total;
>
> - if (nb_numa_nodes > MAX_NODES) {
> - nb_numa_nodes = MAX_NODES;
> + if (num_numa_nodes > MAX_NODES) {
> + num_numa_nodes = MAX_NODES;
> }
>
> /* If no memory size is given for any node, assume the default case
> * and distribute the available memory equally across all nodes
> */
> - for (i = 0; i < nb_numa_nodes; i++) {
> + for (i = 0; i < num_numa_nodes; i++) {
> if (numa_info[i].node_mem != 0) {
> break;
> }
> }
> - if (i == nb_numa_nodes) {
> + if (i == num_numa_nodes) {
> uint64_t usedmem = 0;
>
> /* On Linux, each node's border has to be 8MB aligned,
> * the final node gets the rest.
> */
> - for (i = 0; i < nb_numa_nodes - 1; i++) {
> - numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
> + for (i = 0; i < num_numa_nodes - 1; i++) {
> + numa_info[i].node_mem = (ram_size / num_numa_nodes) &
> ~((1 << 23UL) - 1);
> usedmem += numa_info[i].node_mem;
> }
> @@ -206,7 +206,7 @@ void set_numa_nodes(void)
> }
>
> numa_total = 0;
> - for (i = 0; i < nb_numa_nodes; i++) {
> + for (i = 0; i < num_numa_nodes; i++) {
> numa_total += numa_info[i].node_mem;
> }
> if (numa_total != ram_size) {
> @@ -216,7 +216,7 @@ void set_numa_nodes(void)
> exit(1);
> }
>
> - for (i = 0; i < nb_numa_nodes; i++) {
> + for (i = 0; i < num_numa_nodes; i++) {
> if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
> break;
> }
> @@ -225,9 +225,9 @@ void set_numa_nodes(void)
> * must cope with this anyway, because there are BIOSes out there in
> * real machines which also use this scheme.
> */
> - if (i == nb_numa_nodes) {
> + if (i == num_numa_nodes) {
> for (i = 0; i < max_cpus; i++) {
> - set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
> + set_bit(i, numa_info[i % num_numa_nodes].node_cpu);
> }
> }
> }
> @@ -239,7 +239,7 @@ void set_numa_modes(void)
> int i;
>
> CPU_FOREACH(cpu) {
> - for (i = 0; i < nb_numa_nodes; i++) {
> + for (i = 0; i < num_numa_nodes; i++) {
> if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
> cpu->numa_node = i;
> }
> @@ -282,7 +282,7 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
> uint64_t addr = 0;
> int i;
>
> - if (nb_numa_nodes == 0 || !have_memdevs) {
> + if (num_numa_nodes == 0 || !have_memdevs) {
> allocate_system_memory_nonnuma(mr, owner, name, ram_size);
> return;
> }
> diff --git a/vl.c b/vl.c
> index 6e084c2..0de54c5 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -195,7 +195,7 @@ struct FWBootEntry {
> static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
> QTAILQ_HEAD_INITIALIZER(fw_boot_order);
>
> -int nb_numa_nodes;
> +int num_numa_nodes;
> int max_numa_nodeid;
> NodeInfo numa_info[MAX_NODES];
>
> @@ -2991,7 +2991,7 @@ int main(int argc, char **argv, char **envp)
> bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS);
> }
>
> - nb_numa_nodes = 0;
> + num_numa_nodes = 0;
> max_numa_nodeid = 0;
> nb_nics = 0;
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes
2014-07-03 7:03 ` Michael S. Tsirkin
@ 2014-07-03 16:31 ` Nishanth Aravamudan
0 siblings, 0 replies; 10+ messages in thread
From: Nishanth Aravamudan @ 2014-07-03 16:31 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Eduardo Habkost, Alexey Kardashevskiy, Hu Tao, qemu-devel,
qemu-ppc, Anton Blanchard, David Rientjes, Igor Mammedov
On 03.07.2014 [10:03:10 +0300], Michael S. Tsirkin wrote:
> On Tue, Jul 01, 2014 at 01:11:08PM -0700, Nishanth Aravamudan wrote:
> > As suggested by Eduardo Habkost, rename nb_numa_nodes to num_numa_nodes
> > throughout the code, as that reflects the use of the variable more
> > clearly.
> >
> > Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
>
> So far so good but we are past soft freeze. So please keep reposting as
> RFC to avoid noise, and repost without RFC tag after 2.1 is out.
So sorry about that! Will keep RFC for the next post.
Thanks,
Nish
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2014-07-03 16:32 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-07-01 20:11 [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes Nishanth Aravamudan
2014-07-01 20:13 ` [Qemu-devel] [PATCH 2/2 v5] numa: enable sparse node numbering on ppc Nishanth Aravamudan
2014-07-01 20:39 ` Eduardo Habkost
2014-07-01 20:50 ` Nishanth Aravamudan
2014-07-02 18:21 ` Eduardo Habkost
2014-07-02 21:02 ` Nishanth Aravamudan
2014-07-02 21:52 ` Eduardo Habkost
2014-07-01 20:42 ` [Qemu-devel] [PATCH 1/2] Rename nb_numa_nodes to num_numa_nodes Eduardo Habkost
2014-07-03 7:03 ` Michael S. Tsirkin
2014-07-03 16:31 ` Nishanth Aravamudan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).