From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-bn3nam01on0104.outbound.protection.outlook.com ([104.47.33.104]:34771 "EHLO NAM01-BN3-obe.outbound.protection.outlook.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S932420AbeA1W3u (ORCPT ); Sun, 28 Jan 2018 17:29:50 -0500 From: Sasha Levin To: "linux-kernel@vger.kernel.org" , "stable@vger.kernel.org" CC: Satheesh Rajendran , Balamuruhan S , Arnaldo Carvalho de Melo , Sasha Levin Subject: [PATCH AUTOSEL for 4.4 07/36] perf bench numa: Fixup discontiguous/sparse numa nodes Date: Sun, 28 Jan 2018 22:28:26 +0000 Message-ID: <20180128222815.29479-7-alexander.levin@microsoft.com> References: <20180128222815.29479-1-alexander.levin@microsoft.com> In-Reply-To: <20180128222815.29479-1-alexander.levin@microsoft.com> Content-Language: en-US Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Sender: stable-owner@vger.kernel.org List-ID: From: Satheesh Rajendran [ Upstream commit 321a7c35c90cc834851ceda18a8ee18f1d032b92 ] Certain systems are designed to have sparse/discontiguous nodes. On such systems, 'perf bench numa' hangs, shows wrong number of nodes and shows values for non-existent nodes. Handle this by only taking nodes that are exposed by kernel to userspace. Signed-off-by: Satheesh Rajendran Reviewed-by: Srikar Dronamraju Acked-by: Naveen N. Rao Link: http://lkml.kernel.org/r/1edbcd353c009e109e93d78f2f46381930c340fe.151= 1368645.git.sathnaga@linux.vnet.ibm.com Signed-off-by: Balamuruhan S Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/bench/numa.c | 56 ++++++++++++++++++++++++++++++++++++++++++++-= ---- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index b4eb5b679081..73d192f57dc3 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -208,6 +208,47 @@ static const char * const numa_usage[] =3D { NULL }; =20 +/* + * To get number of numa nodes present. + */ +static int nr_numa_nodes(void) +{ + int i, nr_nodes =3D 0; + + for (i =3D 0; i < g->p.nr_nodes; i++) { + if (numa_bitmask_isbitset(numa_nodes_ptr, i)) + nr_nodes++; + } + + return nr_nodes; +} + +/* + * To check if given numa node is present. + */ +static int is_node_present(int node) +{ + return numa_bitmask_isbitset(numa_nodes_ptr, node); +} + +/* + * To check given numa node has cpus. + */ +static bool node_has_cpus(int node) +{ + struct bitmask *cpu =3D numa_allocate_cpumask(); + unsigned int i; + + if (cpu && !numa_node_to_cpus(node, cpu)) { + for (i =3D 0; i < cpu->size; i++) { + if (numa_bitmask_isbitset(cpu, i)) + return true; + } + } + + return false; /* lets fall back to nocpus safely */ +} + static cpu_set_t bind_to_cpu(int target_cpu) { cpu_set_t orig_mask, mask; @@ -236,12 +277,12 @@ static cpu_set_t bind_to_cpu(int target_cpu) =20 static cpu_set_t bind_to_node(int target_node) { - int cpus_per_node =3D g->p.nr_cpus/g->p.nr_nodes; + int cpus_per_node =3D g->p.nr_cpus / nr_numa_nodes(); cpu_set_t orig_mask, mask; int cpu; int ret; =20 - BUG_ON(cpus_per_node*g->p.nr_nodes !=3D g->p.nr_cpus); + BUG_ON(cpus_per_node * nr_numa_nodes() !=3D g->p.nr_cpus); BUG_ON(!cpus_per_node); =20 ret =3D sched_getaffinity(0, sizeof(orig_mask), &orig_mask); @@ -641,7 +682,7 @@ static int parse_setup_node_list(void) int i; =20 for (i =3D 0; i < mul; i++) { - if (t >=3D g->p.nr_tasks) { + if (t >=3D g->p.nr_tasks || !node_has_cpus(bind_node)) { printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_no= de); goto out; } @@ -956,6 +997,8 @@ static void calc_convergence(double runtime_ns_max, dou= ble *convergence) sum =3D 0; =20 for (node =3D 0; node < g->p.nr_nodes; node++) { + if (!is_node_present(node)) + continue; nr =3D nodes[node]; nr_min =3D min(nr, nr_min); nr_max =3D max(nr, nr_max); @@ -976,8 +1019,11 @@ static void calc_convergence(double runtime_ns_max, d= ouble *convergence) process_groups =3D 0; =20 for (node =3D 0; node < g->p.nr_nodes; node++) { - int processes =3D count_node_processes(node); + int processes; =20 + if (!is_node_present(node)) + continue; + processes =3D count_node_processes(node); nr =3D nodes[node]; tprintf(" %2d/%-2d", nr, processes); =20 @@ -1283,7 +1329,7 @@ static void print_summary(void) =20 printf("\n ###\n"); printf(" # %d %s will execute (on %d nodes, %d CPUs):\n", - g->p.nr_tasks, g->p.nr_tasks =3D=3D 1 ? "task" : "tasks", g->p.nr_nodes,= g->p.nr_cpus); + g->p.nr_tasks, g->p.nr_tasks =3D=3D 1 ? "task" : "tasks", nr_numa_nodes(= ), g->p.nr_cpus); printf(" # %5dx %5ldMB global shared mem operations\n", g->p.nr_loops, g->p.bytes_global/1024/1024); printf(" # %5dx %5ldMB process shared mem operations\n", --=20 2.11.0