[PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs

linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs > 1K
@ 2025-08-14 11:49 Athira Rajeev
  2025-08-14 12:23 ` Venkat
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Athira Rajeev @ 2025-08-14 11:49 UTC (permalink / raw)
  To: acme, jolsa, adrian.hunter, irogers, namhyung
  Cc: linux-perf-users, maddy, atrajeev, kjain, hbathini,
	Aditya.Bodkhe1, Tejas Manhas

The perf record testcase fails on systems with more than 1K CPUs.

Testcase: perf test -vv "PERF_RECORD_* events & perf_sample fields"

  PERF_RECORD_* events & perf_sample fields                       :
  --- start ---
  test child forked, pid 272482
  sched_getaffinity: Invalid argument
  sched__get_first_possible_cpu: Invalid argument
  test child finished with -1
  ---- end ----
  PERF_RECORD_* events & perf_sample fields: FAILED!

sched__get_first_possible_cpu uses "sched_getaffinity" to get the
cpumask and this call is returning EINVAL (Invalid argument).
This happens because the default mask size in glibc is 1024.  To
overcome this 1024 CPUs mask size limitation of cpu_set_t, change the
mask size using the CPU_*_S macros ie, use CPU_ALLOC to allocate
cpumask, CPU_ALLOC_SIZE for size. Same fix needed for mask which is
used to setaffinity so that mask size is large enough to represent
number of possible CPU's in the system.

Reported-by: Tejas Manhas <tejas05@linux.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
---
 tools/perf/tests/perf-record.c | 36 ++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 0b3c37e66871..d895df037707 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -13,15 +13,19 @@
 #include "tests.h"
 #include "util/mmap.h"
 #include "util/sample.h"
+#include "util/cpumap.h"
 
 static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
 {
-	int i, cpu = -1, nrcpus = 1024;
+	int i, cpu = -1;
+	int nrcpus = cpu__max_cpu().cpu;
+	size_t size = CPU_ALLOC_SIZE(nrcpus);
+
 realloc:
-	CPU_ZERO(maskp);
+	CPU_ZERO_S(size, maskp);
 
-	if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
-		if (errno == EINVAL && nrcpus < (1024 << 8)) {
+	if (sched_getaffinity(pid, size, maskp) == -1) {
+		if (errno == EINVAL && nrcpus < (cpu__max_cpu().cpu << 8)) {
 			nrcpus = nrcpus << 2;
 			goto realloc;
 		}
@@ -30,11 +34,11 @@ static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
 	}
 
 	for (i = 0; i < nrcpus; i++) {
-		if (CPU_ISSET(i, maskp)) {
+		if (CPU_ISSET_S(i, size, maskp)) {
 			if (cpu == -1)
 				cpu = i;
 			else
-				CPU_CLR(i, maskp);
+				CPU_CLR_S(i, size, maskp);
 		}
 	}
 
@@ -50,8 +54,9 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
 		.no_buffering = true,
 		.mmap_pages   = 256,
 	};
-	cpu_set_t cpu_mask;
-	size_t cpu_mask_size = sizeof(cpu_mask);
+	int nrcpus = cpu__max_cpu().cpu;
+	cpu_set_t *cpu_mask;
+	size_t cpu_mask_size;
 	struct evlist *evlist = evlist__new_dummy();
 	struct evsel *evsel;
 	struct perf_sample sample;
@@ -69,12 +74,22 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
 	int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
 	char sbuf[STRERR_BUFSIZE];
 
+	cpu_mask = CPU_ALLOC(nrcpus);
+	if (!cpu_mask) {
+		pr_debug("failed to create cpumask\n");
+		goto out;
+	}
+
+	cpu_mask_size = CPU_ALLOC_SIZE(nrcpus);
+	CPU_ZERO_S(cpu_mask_size, cpu_mask);
+
 	perf_sample__init(&sample, /*all=*/false);
 	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
 		evlist = evlist__new_default();
 
 	if (evlist == NULL) {
 		pr_debug("Not enough memory to create evlist\n");
+		CPU_FREE(cpu_mask);
 		goto out;
 	}
 
@@ -111,7 +126,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
 	evsel__set_sample_bit(evsel, TIME);
 	evlist__config(evlist, &opts, NULL);
 
-	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
+	err = sched__get_first_possible_cpu(evlist->workload.pid, cpu_mask);
 	if (err < 0) {
 		pr_debug("sched__get_first_possible_cpu: %s\n",
 			 str_error_r(errno, sbuf, sizeof(sbuf)));
@@ -123,7 +138,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
 	/*
 	 * So that we can check perf_sample.cpu on all the samples.
 	 */
-	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
+	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) {
 		pr_debug("sched_setaffinity: %s\n",
 			 str_error_r(errno, sbuf, sizeof(sbuf)));
 		goto out_delete_evlist;
@@ -328,6 +343,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
 		++errs;
 	}
 out_delete_evlist:
+	CPU_FREE(cpu_mask);
 	evlist__delete(evlist);
 out:
 	perf_sample__exit(&sample);
-- 
2.43.7


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs > 1K
  2025-08-14 11:49 [PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs > 1K Athira Rajeev
@ 2025-08-14 12:23 ` Venkat
  2025-08-14 20:23 ` Namhyung Kim
  2025-08-19 11:56 ` tejas05
  2 siblings, 0 replies; 6+ messages in thread
From: Venkat @ 2025-08-14 12:23 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: Arnaldo Carvalho de Melo, Jiri Olsa, Adrian Hunter, Ian Rogers,
	Namhyung Kim, linux-perf-users, maddy, kjain, hbathini,
	Aditya.Bodkhe1, Tejas Manhas, venkat88



> On 14 Aug 2025, at 5:19 PM, Athira Rajeev <atrajeev@linux.ibm.com> wrote:
> 
> The perf record testcase fails on systems with more than 1K CPUs.
> 
> Testcase: perf test -vv "PERF_RECORD_* events & perf_sample fields"
> 
>  PERF_RECORD_* events & perf_sample fields                       :
>  --- start ---
>  test child forked, pid 272482
>  sched_getaffinity: Invalid argument
>  sched__get_first_possible_cpu: Invalid argument
>  test child finished with -1
>  ---- end ----
>  PERF_RECORD_* events & perf_sample fields: FAILED!
> 
> sched__get_first_possible_cpu uses "sched_getaffinity" to get the
> cpumask and this call is returning EINVAL (Invalid argument).
> This happens because the default mask size in glibc is 1024.  To
> overcome this 1024 CPUs mask size limitation of cpu_set_t, change the
> mask size using the CPU_*_S macros ie, use CPU_ALLOC to allocate
> cpumask, CPU_ALLOC_SIZE for size. Same fix needed for mask which is
> used to setaffinity so that mask size is large enough to represent
> number of possible CPU's in the system.
> 
> Reported-by: Tejas Manhas <tejas05@linux.ibm.com>
> Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
> ---
> tools/perf/tests/perf-record.c | 36 ++++++++++++++++++++++++----------
> 1 file changed, 26 insertions(+), 10 deletions(-)


Tested this patch by applying on top of mainline kernel and it fixes the reported issue. Hence,

Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>


Before this patch:

  7: PERF_RECORD_* events & perf_sample fields:
--- start ---
test child forked, pid 78426
Using CPUID 0x00820200
sched_getaffinity: Invalid argument
sched__get_first_possible_cpu: Invalid argument
---- end(-1) ----
Leak of file descriptor 6 that opened: 'pipe:[476656]'

---- unexpected signal (6) ----
Failed to read build ID for //anon
Failed to read build ID for //anon
Failed to read build ID for //anon
Failed to read build ID for //anon
Failed to read build ID for //anon
    #0 0x101871fc in child_test_sig_handler builtin-test.c:0
Failed to open [vdso], continuing without symbols
    #1 0x7fffae7f04d8 [vdso][4d8]
    #2 0x7fffade396f8 in raise libc-2.28.so[496f8]
    #3 0x7fffade13ff4 in abort libc-2.28.so[23ff4]
    #4 0x101879c4 in check_leaks builtin-test.c:0
    #5 0x10187c20 in run_test_child builtin-test.c:0
    #6 0x100fe7dc in start_command run-command.c:128
    #7 0x10188574 in __cmd_test builtin-test.c:0
    #8 0x10188c4c in cmd_test ??:0
    #9 0x100ea3ac in run_builtin perf.c:0
    #10 0x100eaa34 in handle_internal_command perf.c:0
    #11 0x10036aa8 in main ??:0
    #12 0x7fffade19f5c in generic_start_main.isra.0 libc-2.28.so[29f5c]
    #13 0x7fffade1a0f4 in __libc_start_main libc-2.28.so[2a0f4]
  7: PERF_RECORD_* events & perf_sample fields                       : FAILED!



After this patch:



0x7fffad7bf168: mmap mask[0]:  0x7fffad7bf258: mmap mask[0]:  0x7fffad7bf348: mmap mask[0]:  0x7fffad7bf438: mmap mask[0]:  0x7fffad7bf528: mmap mask[0]:  0x7fffad7bf618: mmap mask[0]:  0x7fffad7bf708: mmap mask[0]:  0x7fffad7bf7f8: mmap mask[0]:  0x7fffad7bf8e8: mmap mask[0]:  0x7fffad7bf9d8: mmap mask[0]:  151627927388184 0 PERF_RECORD_COMM exec: sleep:130633/130633
151627927461320 0 PERF_RECORD_MMAP2 130633/130633: [0x124a50000(0x30000) @ 0 fd:00 251660138 919860811]: r-xp /usr/bin/sleep
151627927510848 0 PERF_RECORD_MMAP2 130633/130633: [0x7fff9c430000(0x60000) @ 0 fd:00 134289888 563052989]: r-xp /usr/lib64/ld-2.28.so
151627927559440 0 PERF_RECORD_MMAP2 130633/130633: [0x7fff9c410000(0x20000) @ 0 00:00 0 0]: r-xp [vdso]
151627927751224 0 PERF_RECORD_MMAP2 130633/130633: [0x7fff9c1f0000(0x210000) @ 0 fd:00 100663849 3568210255]: r-xp /usr/lib64/glibc-hwcaps/power9/libc-2.28.so
151628928449218 0 PERF_RECORD_EXIT(130633:130633):(130632:130632)
---- end(0) ----
  7: PERF_RECORD_* events & perf_sample fields                       : Ok


Regards,
Venkat.
> 
> diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
> index 0b3c37e66871..d895df037707 100644
> --- a/tools/perf/tests/perf-record.c
> +++ b/tools/perf/tests/perf-record.c
> @@ -13,15 +13,19 @@
> #include "tests.h"
> #include "util/mmap.h"
> #include "util/sample.h"
> +#include "util/cpumap.h"
> 
> static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
> {
> - int i, cpu = -1, nrcpus = 1024;
> + int i, cpu = -1;
> + int nrcpus = cpu__max_cpu().cpu;
> + size_t size = CPU_ALLOC_SIZE(nrcpus);
> +
> realloc:
> - CPU_ZERO(maskp);
> + CPU_ZERO_S(size, maskp);
> 
> - if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
> - if (errno == EINVAL && nrcpus < (1024 << 8)) {
> + if (sched_getaffinity(pid, size, maskp) == -1) {
> + if (errno == EINVAL && nrcpus < (cpu__max_cpu().cpu << 8)) {
> nrcpus = nrcpus << 2;
> goto realloc;
> }
> @@ -30,11 +34,11 @@ static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
> }
> 
> for (i = 0; i < nrcpus; i++) {
> - if (CPU_ISSET(i, maskp)) {
> + if (CPU_ISSET_S(i, size, maskp)) {
> if (cpu == -1)
> cpu = i;
> else
> - CPU_CLR(i, maskp);
> + CPU_CLR_S(i, size, maskp);
> }
> }
> 
> @@ -50,8 +54,9 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
> .no_buffering = true,
> .mmap_pages   = 256,
> };
> - cpu_set_t cpu_mask;
> - size_t cpu_mask_size = sizeof(cpu_mask);
> + int nrcpus = cpu__max_cpu().cpu;
> + cpu_set_t *cpu_mask;
> + size_t cpu_mask_size;
> struct evlist *evlist = evlist__new_dummy();
> struct evsel *evsel;
> struct perf_sample sample;
> @@ -69,12 +74,22 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
> int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
> char sbuf[STRERR_BUFSIZE];
> 
> + cpu_mask = CPU_ALLOC(nrcpus);
> + if (!cpu_mask) {
> + pr_debug("failed to create cpumask\n");
> + goto out;
> + }
> +
> + cpu_mask_size = CPU_ALLOC_SIZE(nrcpus);
> + CPU_ZERO_S(cpu_mask_size, cpu_mask);
> +
> perf_sample__init(&sample, /*all=*/false);
> if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
> evlist = evlist__new_default();
> 
> if (evlist == NULL) {
> pr_debug("Not enough memory to create evlist\n");
> + CPU_FREE(cpu_mask);
> goto out;
> }
> 
> @@ -111,7 +126,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
> evsel__set_sample_bit(evsel, TIME);
> evlist__config(evlist, &opts, NULL);
> 
> - err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
> + err = sched__get_first_possible_cpu(evlist->workload.pid, cpu_mask);
> if (err < 0) {
> pr_debug("sched__get_first_possible_cpu: %s\n",
> str_error_r(errno, sbuf, sizeof(sbuf)));
> @@ -123,7 +138,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
> /*
> * So that we can check perf_sample.cpu on all the samples.
> */
> - if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
> + if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) {
> pr_debug("sched_setaffinity: %s\n",
> str_error_r(errno, sbuf, sizeof(sbuf)));
> goto out_delete_evlist;
> @@ -328,6 +343,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
> ++errs;
> }
> out_delete_evlist:
> + CPU_FREE(cpu_mask);
> evlist__delete(evlist);
> out:
> perf_sample__exit(&sample);
> -- 
> 2.43.7
> 
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs > 1K
  2025-08-14 11:49 [PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs > 1K Athira Rajeev
  2025-08-14 12:23 ` Venkat
@ 2025-08-14 20:23 ` Namhyung Kim
  2025-08-20 17:20   ` Athira Rajeev
  2025-08-19 11:56 ` tejas05
  2 siblings, 1 reply; 6+ messages in thread
From: Namhyung Kim @ 2025-08-14 20:23 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: acme, jolsa, adrian.hunter, irogers, linux-perf-users, maddy,
	kjain, hbathini, Aditya.Bodkhe1, Tejas Manhas

Hello,

On Thu, Aug 14, 2025 at 05:19:08PM +0530, Athira Rajeev wrote:
> The perf record testcase fails on systems with more than 1K CPUs.
> 
> Testcase: perf test -vv "PERF_RECORD_* events & perf_sample fields"
> 
>   PERF_RECORD_* events & perf_sample fields                       :
>   --- start ---
>   test child forked, pid 272482
>   sched_getaffinity: Invalid argument
>   sched__get_first_possible_cpu: Invalid argument
>   test child finished with -1
>   ---- end ----
>   PERF_RECORD_* events & perf_sample fields: FAILED!
> 
> sched__get_first_possible_cpu uses "sched_getaffinity" to get the
> cpumask and this call is returning EINVAL (Invalid argument).
> This happens because the default mask size in glibc is 1024.  To
> overcome this 1024 CPUs mask size limitation of cpu_set_t, change the
> mask size using the CPU_*_S macros ie, use CPU_ALLOC to allocate
> cpumask, CPU_ALLOC_SIZE for size. Same fix needed for mask which is
> used to setaffinity so that mask size is large enough to represent
> number of possible CPU's in the system.
> 
> Reported-by: Tejas Manhas <tejas05@linux.ibm.com>
> Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>

Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks,
Namhyung

> ---
>  tools/perf/tests/perf-record.c | 36 ++++++++++++++++++++++++----------
>  1 file changed, 26 insertions(+), 10 deletions(-)
> 
> diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
> index 0b3c37e66871..d895df037707 100644
> --- a/tools/perf/tests/perf-record.c
> +++ b/tools/perf/tests/perf-record.c
> @@ -13,15 +13,19 @@
>  #include "tests.h"
>  #include "util/mmap.h"
>  #include "util/sample.h"
> +#include "util/cpumap.h"
>  
>  static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
>  {
> -	int i, cpu = -1, nrcpus = 1024;
> +	int i, cpu = -1;
> +	int nrcpus = cpu__max_cpu().cpu;
> +	size_t size = CPU_ALLOC_SIZE(nrcpus);
> +
>  realloc:
> -	CPU_ZERO(maskp);
> +	CPU_ZERO_S(size, maskp);
>  
> -	if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
> -		if (errno == EINVAL && nrcpus < (1024 << 8)) {
> +	if (sched_getaffinity(pid, size, maskp) == -1) {
> +		if (errno == EINVAL && nrcpus < (cpu__max_cpu().cpu << 8)) {
>  			nrcpus = nrcpus << 2;
>  			goto realloc;
>  		}
> @@ -30,11 +34,11 @@ static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
>  	}
>  
>  	for (i = 0; i < nrcpus; i++) {
> -		if (CPU_ISSET(i, maskp)) {
> +		if (CPU_ISSET_S(i, size, maskp)) {
>  			if (cpu == -1)
>  				cpu = i;
>  			else
> -				CPU_CLR(i, maskp);
> +				CPU_CLR_S(i, size, maskp);
>  		}
>  	}
>  
> @@ -50,8 +54,9 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>  		.no_buffering = true,
>  		.mmap_pages   = 256,
>  	};
> -	cpu_set_t cpu_mask;
> -	size_t cpu_mask_size = sizeof(cpu_mask);
> +	int nrcpus = cpu__max_cpu().cpu;
> +	cpu_set_t *cpu_mask;
> +	size_t cpu_mask_size;
>  	struct evlist *evlist = evlist__new_dummy();
>  	struct evsel *evsel;
>  	struct perf_sample sample;
> @@ -69,12 +74,22 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>  	int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
>  	char sbuf[STRERR_BUFSIZE];
>  
> +	cpu_mask = CPU_ALLOC(nrcpus);
> +	if (!cpu_mask) {
> +		pr_debug("failed to create cpumask\n");
> +		goto out;
> +	}
> +
> +	cpu_mask_size = CPU_ALLOC_SIZE(nrcpus);
> +	CPU_ZERO_S(cpu_mask_size, cpu_mask);
> +
>  	perf_sample__init(&sample, /*all=*/false);
>  	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
>  		evlist = evlist__new_default();
>  
>  	if (evlist == NULL) {
>  		pr_debug("Not enough memory to create evlist\n");
> +		CPU_FREE(cpu_mask);
>  		goto out;
>  	}
>  
> @@ -111,7 +126,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>  	evsel__set_sample_bit(evsel, TIME);
>  	evlist__config(evlist, &opts, NULL);
>  
> -	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
> +	err = sched__get_first_possible_cpu(evlist->workload.pid, cpu_mask);
>  	if (err < 0) {
>  		pr_debug("sched__get_first_possible_cpu: %s\n",
>  			 str_error_r(errno, sbuf, sizeof(sbuf)));
> @@ -123,7 +138,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>  	/*
>  	 * So that we can check perf_sample.cpu on all the samples.
>  	 */
> -	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
> +	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) {
>  		pr_debug("sched_setaffinity: %s\n",
>  			 str_error_r(errno, sbuf, sizeof(sbuf)));
>  		goto out_delete_evlist;
> @@ -328,6 +343,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>  		++errs;
>  	}
>  out_delete_evlist:
> +	CPU_FREE(cpu_mask);
>  	evlist__delete(evlist);
>  out:
>  	perf_sample__exit(&sample);
> -- 
> 2.43.7
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs > 1K
  2025-08-14 20:23 ` Namhyung Kim
@ 2025-08-20 17:20   ` Athira Rajeev
  0 siblings, 0 replies; 6+ messages in thread
From: Athira Rajeev @ 2025-08-20 17:20 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Arnaldo Carvalho de Melo, Jiri Olsa, adrian.hunter, Ian Rogers,
	linux-perf-users, maddy, kjain, hbathini, Aditya.Bodkhe1,
	Tejas Manhas



> On 15 Aug 2025, at 1:53 AM, Namhyung Kim <namhyung@kernel.org> wrote:
> 
> Hello,
> 
> On Thu, Aug 14, 2025 at 05:19:08PM +0530, Athira Rajeev wrote:
>> The perf record testcase fails on systems with more than 1K CPUs.
>> 
>> Testcase: perf test -vv "PERF_RECORD_* events & perf_sample fields"
>> 
>>  PERF_RECORD_* events & perf_sample fields                       :
>>  --- start ---
>>  test child forked, pid 272482
>>  sched_getaffinity: Invalid argument
>>  sched__get_first_possible_cpu: Invalid argument
>>  test child finished with -1
>>  ---- end ----
>>  PERF_RECORD_* events & perf_sample fields: FAILED!
>> 
>> sched__get_first_possible_cpu uses "sched_getaffinity" to get the
>> cpumask and this call is returning EINVAL (Invalid argument).
>> This happens because the default mask size in glibc is 1024.  To
>> overcome this 1024 CPUs mask size limitation of cpu_set_t, change the
>> mask size using the CPU_*_S macros ie, use CPU_ALLOC to allocate
>> cpumask, CPU_ALLOC_SIZE for size. Same fix needed for mask which is
>> used to setaffinity so that mask size is large enough to represent
>> number of possible CPU's in the system.
>> 
>> Reported-by: Tejas Manhas <tejas05@linux.ibm.com>
>> Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
> 
> Acked-by: Namhyung Kim <namhyung@kernel.org>
> 
> Thanks,
> Namhyung

Thanks Namhyung for the Ack

Athira.
> 
>> ---
>> tools/perf/tests/perf-record.c | 36 ++++++++++++++++++++++++----------
>> 1 file changed, 26 insertions(+), 10 deletions(-)
>> 
>> diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
>> index 0b3c37e66871..d895df037707 100644
>> --- a/tools/perf/tests/perf-record.c
>> +++ b/tools/perf/tests/perf-record.c
>> @@ -13,15 +13,19 @@
>> #include "tests.h"
>> #include "util/mmap.h"
>> #include "util/sample.h"
>> +#include "util/cpumap.h"
>> 
>> static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
>> {
>> - int i, cpu = -1, nrcpus = 1024;
>> + int i, cpu = -1;
>> + int nrcpus = cpu__max_cpu().cpu;
>> + size_t size = CPU_ALLOC_SIZE(nrcpus);
>> +
>> realloc:
>> - CPU_ZERO(maskp);
>> + CPU_ZERO_S(size, maskp);
>> 
>> - if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
>> - if (errno == EINVAL && nrcpus < (1024 << 8)) {
>> + if (sched_getaffinity(pid, size, maskp) == -1) {
>> + if (errno == EINVAL && nrcpus < (cpu__max_cpu().cpu << 8)) {
>> nrcpus = nrcpus << 2;
>> goto realloc;
>> }
>> @@ -30,11 +34,11 @@ static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
>> }
>> 
>> for (i = 0; i < nrcpus; i++) {
>> - if (CPU_ISSET(i, maskp)) {
>> + if (CPU_ISSET_S(i, size, maskp)) {
>> if (cpu == -1)
>> cpu = i;
>> else
>> - CPU_CLR(i, maskp);
>> + CPU_CLR_S(i, size, maskp);
>> }
>> }
>> 
>> @@ -50,8 +54,9 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>> .no_buffering = true,
>> .mmap_pages   = 256,
>> };
>> - cpu_set_t cpu_mask;
>> - size_t cpu_mask_size = sizeof(cpu_mask);
>> + int nrcpus = cpu__max_cpu().cpu;
>> + cpu_set_t *cpu_mask;
>> + size_t cpu_mask_size;
>> struct evlist *evlist = evlist__new_dummy();
>> struct evsel *evsel;
>> struct perf_sample sample;
>> @@ -69,12 +74,22 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>> int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
>> char sbuf[STRERR_BUFSIZE];
>> 
>> + cpu_mask = CPU_ALLOC(nrcpus);
>> + if (!cpu_mask) {
>> + pr_debug("failed to create cpumask\n");
>> + goto out;
>> + }
>> +
>> + cpu_mask_size = CPU_ALLOC_SIZE(nrcpus);
>> + CPU_ZERO_S(cpu_mask_size, cpu_mask);
>> +
>> perf_sample__init(&sample, /*all=*/false);
>> if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
>> evlist = evlist__new_default();
>> 
>> if (evlist == NULL) {
>> pr_debug("Not enough memory to create evlist\n");
>> + CPU_FREE(cpu_mask);
>> goto out;
>> }
>> 
>> @@ -111,7 +126,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>> evsel__set_sample_bit(evsel, TIME);
>> evlist__config(evlist, &opts, NULL);
>> 
>> - err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
>> + err = sched__get_first_possible_cpu(evlist->workload.pid, cpu_mask);
>> if (err < 0) {
>> pr_debug("sched__get_first_possible_cpu: %s\n",
>>  str_error_r(errno, sbuf, sizeof(sbuf)));
>> @@ -123,7 +138,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>> /*
>>  * So that we can check perf_sample.cpu on all the samples.
>>  */
>> - if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
>> + if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) {
>> pr_debug("sched_setaffinity: %s\n",
>>  str_error_r(errno, sbuf, sizeof(sbuf)));
>> goto out_delete_evlist;
>> @@ -328,6 +343,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>> ++errs;
>> }
>> out_delete_evlist:
>> + CPU_FREE(cpu_mask);
>> evlist__delete(evlist);
>> out:
>> perf_sample__exit(&sample);
>> -- 
>> 2.43.7



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs > 1K
  2025-08-14 11:49 [PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs > 1K Athira Rajeev
  2025-08-14 12:23 ` Venkat
  2025-08-14 20:23 ` Namhyung Kim
@ 2025-08-19 11:56 ` tejas05
  2025-08-20 17:22   ` Athira Rajeev
  2 siblings, 1 reply; 6+ messages in thread
From: tejas05 @ 2025-08-19 11:56 UTC (permalink / raw)
  To: Athira Rajeev, acme, jolsa, adrian.hunter, irogers, namhyung
  Cc: linux-perf-users, maddy, kjain, hbathini, Aditya.Bodkhe1

On 8/14/25 17:19, Athira Rajeev wrote:

> The perf record testcase fails on systems with more than 1K CPUs.
>
> Testcase: perf test -vv "PERF_RECORD_* events & perf_sample fields"
>
>    PERF_RECORD_* events & perf_sample fields                       :
>    --- start ---
>    test child forked, pid 272482
>    sched_getaffinity: Invalid argument
>    sched__get_first_possible_cpu: Invalid argument
>    test child finished with -1
>    ---- end ----
>    PERF_RECORD_* events & perf_sample fields: FAILED!
>
> sched__get_first_possible_cpu uses "sched_getaffinity" to get the
> cpumask and this call is returning EINVAL (Invalid argument).
> This happens because the default mask size in glibc is 1024.  To
> overcome this 1024 CPUs mask size limitation of cpu_set_t, change the
> mask size using the CPU_*_S macros ie, use CPU_ALLOC to allocate
> cpumask, CPU_ALLOC_SIZE for size. Same fix needed for mask which is
> used to setaffinity so that mask size is large enough to represent
> number of possible CPU's in the system.
>
> Reported-by: Tejas Manhas <tejas05@linux.ibm.com>
> Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
> ---
>   tools/perf/tests/perf-record.c | 36 ++++++++++++++++++++++++----------
>   1 file changed, 26 insertions(+), 10 deletions(-)
>
> diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
> index 0b3c37e66871..d895df037707 100644
> --- a/tools/perf/tests/perf-record.c
> +++ b/tools/perf/tests/perf-record.c
> @@ -13,15 +13,19 @@
>   #include "tests.h"
>   #include "util/mmap.h"
>   #include "util/sample.h"
> +#include "util/cpumap.h"
>   
>   static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
>   {
> -	int i, cpu = -1, nrcpus = 1024;
> +	int i, cpu = -1;
> +	int nrcpus = cpu__max_cpu().cpu;
> +	size_t size = CPU_ALLOC_SIZE(nrcpus);
> +
>   realloc:
> -	CPU_ZERO(maskp);
> +	CPU_ZERO_S(size, maskp);
>   
> -	if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
> -		if (errno == EINVAL && nrcpus < (1024 << 8)) {
> +	if (sched_getaffinity(pid, size, maskp) == -1) {
> +		if (errno == EINVAL && nrcpus < (cpu__max_cpu().cpu << 8)) {
>   			nrcpus = nrcpus << 2;
>   			goto realloc;
>   		}
> @@ -30,11 +34,11 @@ static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
>   	}
>   
>   	for (i = 0; i < nrcpus; i++) {
> -		if (CPU_ISSET(i, maskp)) {
> +		if (CPU_ISSET_S(i, size, maskp)) {
>   			if (cpu == -1)
>   				cpu = i;
>   			else
> -				CPU_CLR(i, maskp);
> +				CPU_CLR_S(i, size, maskp);
>   		}
>   	}
>   
> @@ -50,8 +54,9 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>   		.no_buffering = true,
>   		.mmap_pages   = 256,
>   	};
> -	cpu_set_t cpu_mask;
> -	size_t cpu_mask_size = sizeof(cpu_mask);
> +	int nrcpus = cpu__max_cpu().cpu;
> +	cpu_set_t *cpu_mask;
> +	size_t cpu_mask_size;
>   	struct evlist *evlist = evlist__new_dummy();
>   	struct evsel *evsel;
>   	struct perf_sample sample;
> @@ -69,12 +74,22 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>   	int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
>   	char sbuf[STRERR_BUFSIZE];
>   
> +	cpu_mask = CPU_ALLOC(nrcpus);
> +	if (!cpu_mask) {
> +		pr_debug("failed to create cpumask\n");
> +		goto out;
> +	}
> +
> +	cpu_mask_size = CPU_ALLOC_SIZE(nrcpus);
> +	CPU_ZERO_S(cpu_mask_size, cpu_mask);
> +
>   	perf_sample__init(&sample, /*all=*/false);
>   	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
>   		evlist = evlist__new_default();
>   
>   	if (evlist == NULL) {
>   		pr_debug("Not enough memory to create evlist\n");
> +		CPU_FREE(cpu_mask);
>   		goto out;
>   	}
>   
> @@ -111,7 +126,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>   	evsel__set_sample_bit(evsel, TIME);
>   	evlist__config(evlist, &opts, NULL);
>   
> -	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
> +	err = sched__get_first_possible_cpu(evlist->workload.pid, cpu_mask);
>   	if (err < 0) {
>   		pr_debug("sched__get_first_possible_cpu: %s\n",
>   			 str_error_r(errno, sbuf, sizeof(sbuf)));
> @@ -123,7 +138,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>   	/*
>   	 * So that we can check perf_sample.cpu on all the samples.
>   	 */
> -	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
> +	if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) {
>   		pr_debug("sched_setaffinity: %s\n",
>   			 str_error_r(errno, sbuf, sizeof(sbuf)));
>   		goto out_delete_evlist;
> @@ -328,6 +343,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>   		++errs;
>   	}
>   out_delete_evlist:
> +	CPU_FREE(cpu_mask);
>   	evlist__delete(evlist);
>   out:
>   	perf_sample__exit(&sample);


Hi,

I have tested the patch on the setup where the issue was seen, the patch fixes the issue. Please add the tested-by tag as below.
Tested-by: Tejas Manhas<tejas05@linux.ibm.com>

Thanks & Regards
Tejas



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs > 1K
  2025-08-19 11:56 ` tejas05
@ 2025-08-20 17:22   ` Athira Rajeev
  0 siblings, 0 replies; 6+ messages in thread
From: Athira Rajeev @ 2025-08-20 17:22 UTC (permalink / raw)
  To: tejas05, Venkat Rao Bagalkote
  Cc: Arnaldo Carvalho de Melo, Jiri Olsa, Adrian Hunter, Ian Rogers,
	Namhyung Kim, open list:PERFORMANCE EVENTS SUBSYSTEM,
	Madhavan Srinivasan, Kajol Jain, hbathini, Aditya Bodkhe



> On 19 Aug 2025, at 5:26 PM, tejas05 <tejas05@linux.ibm.com> wrote:
> 
> On 8/14/25 17:19, Athira Rajeev wrote:
> 
>> The perf record testcase fails on systems with more than 1K CPUs.
>> 
>> Testcase: perf test -vv "PERF_RECORD_* events & perf_sample fields"
>> 
>>   PERF_RECORD_* events & perf_sample fields                       :
>>   --- start ---
>>   test child forked, pid 272482
>>   sched_getaffinity: Invalid argument
>>   sched__get_first_possible_cpu: Invalid argument
>>   test child finished with -1
>>   ---- end ----
>>   PERF_RECORD_* events & perf_sample fields: FAILED!
>> 
>> sched__get_first_possible_cpu uses "sched_getaffinity" to get the
>> cpumask and this call is returning EINVAL (Invalid argument).
>> This happens because the default mask size in glibc is 1024.  To
>> overcome this 1024 CPUs mask size limitation of cpu_set_t, change the
>> mask size using the CPU_*_S macros ie, use CPU_ALLOC to allocate
>> cpumask, CPU_ALLOC_SIZE for size. Same fix needed for mask which is
>> used to setaffinity so that mask size is large enough to represent
>> number of possible CPU's in the system.
>> 
>> Reported-by: Tejas Manhas <tejas05@linux.ibm.com>
>> Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
>> ---
>>  tools/perf/tests/perf-record.c | 36 ++++++++++++++++++++++++----------
>>  1 file changed, 26 insertions(+), 10 deletions(-)
>> 
>> diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
>> index 0b3c37e66871..d895df037707 100644
>> --- a/tools/perf/tests/perf-record.c
>> +++ b/tools/perf/tests/perf-record.c
>> @@ -13,15 +13,19 @@
>>  #include "tests.h"
>>  #include "util/mmap.h"
>>  #include "util/sample.h"
>> +#include "util/cpumap.h"
>>    static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
>>  {
>> - int i, cpu = -1, nrcpus = 1024;
>> + int i, cpu = -1;
>> + int nrcpus = cpu__max_cpu().cpu;
>> + size_t size = CPU_ALLOC_SIZE(nrcpus);
>> +
>>  realloc:
>> - CPU_ZERO(maskp);
>> + CPU_ZERO_S(size, maskp);
>>  - if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
>> - if (errno == EINVAL && nrcpus < (1024 << 8)) {
>> + if (sched_getaffinity(pid, size, maskp) == -1) {
>> + if (errno == EINVAL && nrcpus < (cpu__max_cpu().cpu << 8)) {
>>   nrcpus = nrcpus << 2;
>>   goto realloc;
>>   }
>> @@ -30,11 +34,11 @@ static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
>>   }
>>     for (i = 0; i < nrcpus; i++) {
>> - if (CPU_ISSET(i, maskp)) {
>> + if (CPU_ISSET_S(i, size, maskp)) {
>>   if (cpu == -1)
>>   cpu = i;
>>   else
>> - CPU_CLR(i, maskp);
>> + CPU_CLR_S(i, size, maskp);
>>   }
>>   }
>>  @@ -50,8 +54,9 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>>   .no_buffering = true,
>>   .mmap_pages   = 256,
>>   };
>> - cpu_set_t cpu_mask;
>> - size_t cpu_mask_size = sizeof(cpu_mask);
>> + int nrcpus = cpu__max_cpu().cpu;
>> + cpu_set_t *cpu_mask;
>> + size_t cpu_mask_size;
>>   struct evlist *evlist = evlist__new_dummy();
>>   struct evsel *evsel;
>>   struct perf_sample sample;
>> @@ -69,12 +74,22 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>>   int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
>>   char sbuf[STRERR_BUFSIZE];
>>  + cpu_mask = CPU_ALLOC(nrcpus);
>> + if (!cpu_mask) {
>> + pr_debug("failed to create cpumask\n");
>> + goto out;
>> + }
>> +
>> + cpu_mask_size = CPU_ALLOC_SIZE(nrcpus);
>> + CPU_ZERO_S(cpu_mask_size, cpu_mask);
>> +
>>   perf_sample__init(&sample, /*all=*/false);
>>   if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
>>   evlist = evlist__new_default();
>>     if (evlist == NULL) {
>>   pr_debug("Not enough memory to create evlist\n");
>> + CPU_FREE(cpu_mask);
>>   goto out;
>>   }
>>  @@ -111,7 +126,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>>   evsel__set_sample_bit(evsel, TIME);
>>   evlist__config(evlist, &opts, NULL);
>>  - err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
>> + err = sched__get_first_possible_cpu(evlist->workload.pid, cpu_mask);
>>   if (err < 0) {
>>   pr_debug("sched__get_first_possible_cpu: %s\n",
>>    str_error_r(errno, sbuf, sizeof(sbuf)));
>> @@ -123,7 +138,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>>   /*
>>    * So that we can check perf_sample.cpu on all the samples.
>>    */
>> - if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
>> + if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) {
>>   pr_debug("sched_setaffinity: %s\n",
>>    str_error_r(errno, sbuf, sizeof(sbuf)));
>>   goto out_delete_evlist;
>> @@ -328,6 +343,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>>   ++errs;
>>   }
>>  out_delete_evlist:
>> + CPU_FREE(cpu_mask);
>>   evlist__delete(evlist);
>>  out:
>>   perf_sample__exit(&sample);
> 
> 
> Hi,
> 
> I have tested the patch on the setup where the issue was seen, the patch fixes the issue. Please add the tested-by tag as below.
> Tested-by: Tejas Manhas<tejas05@linux.ibm.com>
> 
> Thanks & Regards
> Tejas

Hi Tejas, Venkat

Thanks for testing the patch

Athira.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-08-20 17:23 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-14 11:49 [PATCH] tools/perf/tests: Update perf record testcase to fix usage of affinity for machines with #CPUs > 1K Athira Rajeev
2025-08-14 12:23 ` Venkat
2025-08-14 20:23 ` Namhyung Kim
2025-08-20 17:20   ` Athira Rajeev
2025-08-19 11:56 ` tejas05
2025-08-20 17:22   ` Athira Rajeev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).