* [PATCH v2 1/2] perf/bench/numa: Add functions to detect sparse numa nodes
2017-08-21 9:27 [PATCH v2 0/2] Fixup for discontiguous/sparse numa nodes sathnaga
@ 2017-08-21 9:27 ` sathnaga
2017-08-21 9:28 ` [PATCH v2 2/2] perf/bench/numa: Handle discontiguous/sparse " sathnaga
1 sibling, 0 replies; 3+ messages in thread
From: sathnaga @ 2017-08-21 9:27 UTC (permalink / raw)
To: acme, mingo, linux-kernel, linux-perf-users
Cc: srikar, bala24, Satheesh Rajendran
From: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Added functions 1) to get a count of all nodes that are exposed to
userspace. These nodes could be memoryless cpu nodes or cpuless memory
nodes, 2) to check given node is present and 3) to check given
node has cpus
This information can be used to handle sparse/discontiguous nodes.
Reviewed-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Signed-off-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
---
tools/perf/bench/numa.c | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 469d65b..300faba1 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -215,6 +215,41 @@ static const char * const numa_usage[] = {
NULL
};
+// To get number of numa nodes present.
+static int nr_numa_nodes(void)
+{
+ int i, nr_nodes = 0;
+
+ for (i = 0; i < g->p.nr_nodes; i++) {
+ if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+ nr_nodes++;
+ }
+ return nr_nodes;
+}
+
+// To check if given numa node is present.
+static int is_node_present(int node)
+{
+ return numa_bitmask_isbitset(numa_nodes_ptr, node);
+}
+
+// To check given numa node has cpus.
+static bool node_has_cpus(int node)
+{
+ struct bitmask *cpu = numa_allocate_cpumask();
+ unsigned int i;
+
+ if (cpu == NULL)
+ return false; // lets fall back to nocpus safely
+ if (numa_node_to_cpus(node, cpu) == 0) {
+ for (i = 0; i < cpu->size; i++) {
+ if (numa_bitmask_isbitset(cpu, i))
+ return true;
+ }
+ }
+ return false;
+}
+
static cpu_set_t bind_to_cpu(int target_cpu)
{
cpu_set_t orig_mask, mask;
--
2.7.4
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH v2 2/2] perf/bench/numa: Handle discontiguous/sparse numa nodes
2017-08-21 9:27 [PATCH v2 0/2] Fixup for discontiguous/sparse numa nodes sathnaga
2017-08-21 9:27 ` [PATCH v2 1/2] perf/bench/numa: Add functions to detect sparse " sathnaga
@ 2017-08-21 9:28 ` sathnaga
1 sibling, 0 replies; 3+ messages in thread
From: sathnaga @ 2017-08-21 9:28 UTC (permalink / raw)
To: acme, mingo, linux-kernel, linux-perf-users
Cc: srikar, bala24, Satheesh Rajendran
From: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Certain systems are designed to have sparse/discontiguous nodes.
On such systems, perf bench numa hangs, shows wrong number of nodes
and shows values for non-existent nodes. Handle this by only
taking nodes that are exposed by kernel to userspace.
Reviewed-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Signed-off-by: Balamuruhan S <bala24@linux.vnet.ibm.com>
---
tools/perf/bench/numa.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 300faba1..a3deee2 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -278,12 +278,12 @@ static cpu_set_t bind_to_cpu(int target_cpu)
static cpu_set_t bind_to_node(int target_node)
{
- int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
+ int cpus_per_node = g->p.nr_cpus/nr_numa_nodes();
cpu_set_t orig_mask, mask;
int cpu;
int ret;
- BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
+ BUG_ON(cpus_per_node*nr_numa_nodes() != g->p.nr_cpus);
BUG_ON(!cpus_per_node);
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
@@ -683,7 +683,7 @@ static int parse_setup_node_list(void)
int i;
for (i = 0; i < mul; i++) {
- if (t >= g->p.nr_tasks) {
+ if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) {
printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
goto out;
}
@@ -964,6 +964,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
int node;
int cpu;
int t;
+ int processes;
if (!g->p.show_convergence && !g->p.measure_convergence)
return;
@@ -998,13 +999,14 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
sum = 0;
for (node = 0; node < g->p.nr_nodes; node++) {
+ if (!is_node_present(node))
+ continue;
nr = nodes[node];
nr_min = min(nr, nr_min);
nr_max = max(nr, nr_max);
sum += nr;
}
BUG_ON(nr_min > nr_max);
-
BUG_ON(sum > g->p.nr_tasks);
if (0 && (sum < g->p.nr_tasks))
@@ -1018,8 +1020,9 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
process_groups = 0;
for (node = 0; node < g->p.nr_nodes; node++) {
- int processes = count_node_processes(node);
-
+ if (!is_node_present(node))
+ continue;
+ processes = count_node_processes(node);
nr = nodes[node];
tprintf(" %2d/%-2d", nr, processes);
@@ -1325,7 +1328,7 @@ static void print_summary(void)
printf("\n ###\n");
printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
- g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus);
+ g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus);
printf(" # %5dx %5ldMB global shared mem operations\n",
g->p.nr_loops, g->p.bytes_global/1024/1024);
printf(" # %5dx %5ldMB process shared mem operations\n",
--
2.7.4
^ permalink raw reply related [flat|nested] 3+ messages in thread