[PATCH] perf tests: Don't retest sections in "Object code reading"

linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] perf tests: Don't retest sections in "Object code reading"
@ 2025-10-06 13:11 James Clark
  2025-10-06 15:21 ` Ian Rogers
  0 siblings, 1 reply; 11+ messages in thread
From: James Clark @ 2025-10-06 13:11 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Leo Yan
  Cc: linux-perf-users, linux-kernel, James Clark

We already only test each kcore map once, but on slow systems
(particularly with network filesystems) even the non-kcore maps are
slow. The test can test the same objump output over and over which only
wastes time. Generalize the skipping mechanism to track all DSOs and
addresses so that each section is only tested once.

On a fully loaded Arm Juno (simulating a parallel Perf test run) with a
network filesystem, the original runtime is:

  real  1m51.126s
  user  0m19.445s
  sys   1m15.431s

And the new runtime is:

  real  0m48.873s
  user  0m8.031s
  sys   0m32.353s

Signed-off-by: James Clark <james.clark@linaro.org>
---
 tools/perf/tests/code-reading.c | 119 ++++++++++++++++++++++++++++------------
 1 file changed, 85 insertions(+), 34 deletions(-)

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 9c2091310191..4c9fbf6965c4 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -2,6 +2,7 @@
 #include <errno.h>
 #include <linux/kconfig.h>
 #include <linux/kernel.h>
+#include <linux/rbtree.h>
 #include <linux/types.h>
 #include <inttypes.h>
 #include <stdlib.h>
@@ -39,11 +40,64 @@
 #define BUFSZ	1024
 #define READLEN	128
 
-struct state {
-	u64 done[1024];
-	size_t done_cnt;
+struct tested_section {
+	struct rb_node rb_node;
+	u64 addr;
+	char path[PATH_MAX];
 };
 
+static bool tested_code_insert_or_exists(const char *path, u64 addr,
+					 struct rb_root *tested_sections)
+{
+	struct rb_node **node = &tested_sections->rb_node;
+	struct rb_node *parent = NULL;
+	struct tested_section *data;
+
+	while (*node) {
+		int cmp;
+
+		parent = *node;
+		data = rb_entry(*node, struct tested_section, rb_node);
+		cmp = strcmp(path, data->path);
+		if (!cmp) {
+			if (addr < data->addr)
+				cmp = -1;
+			else if (addr > data->addr)
+				cmp = 1;
+			else
+				return true; /* already tested */
+		}
+
+		if (cmp < 0)
+			node = &(*node)->rb_left;
+		else
+			node = &(*node)->rb_right;
+	}
+
+	data = zalloc(sizeof(*data));
+	if (!data)
+		return true;
+
+	data->addr = addr;
+	strlcpy(data->path, path, sizeof(data->path));
+	rb_link_node(&data->rb_node, parent, node);
+	rb_insert_color(&data->rb_node, tested_sections);
+	return false;
+}
+
+static void tested_sections__free(struct rb_root *root)
+{
+	while (!RB_EMPTY_ROOT(root)) {
+		struct rb_node *node = rb_first(root);
+		struct tested_section *ts = rb_entry(node,
+						     struct tested_section,
+						     rb_node);
+
+		rb_erase(node, root);
+		free(ts);
+	}
+}
+
 static size_t read_objdump_chunk(const char **line, unsigned char **buf,
 				 size_t *buf_len)
 {
@@ -316,13 +370,15 @@ static void dump_buf(unsigned char *buf, size_t len)
 }
 
 static int read_object_code(u64 addr, size_t len, u8 cpumode,
-			    struct thread *thread, struct state *state)
+			    struct thread *thread,
+			    struct rb_root *tested_sections)
 {
 	struct addr_location al;
 	unsigned char buf1[BUFSZ] = {0};
 	unsigned char buf2[BUFSZ] = {0};
 	size_t ret_len;
 	u64 objdump_addr;
+	u64 skip_addr;
 	const char *objdump_name;
 	char decomp_name[KMOD_DECOMP_LEN];
 	bool decomp = false;
@@ -350,6 +406,18 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 		goto out;
 	}
 
+	/*
+	 * Don't retest the same addresses. objdump struggles with kcore - try
+	 * each map only once even if the address is different.
+	 */
+	skip_addr = dso__is_kcore(dso) ? map__start(al.map) : al.addr;
+	if (tested_code_insert_or_exists(dso__long_name(dso), skip_addr,
+					 tested_sections)) {
+		pr_debug("Already tested %s @ %#"PRIx64" - skipping\n",
+			 dso__long_name(dso), skip_addr);
+		goto out;
+	}
+
 	pr_debug("On file address is: %#"PRIx64"\n", al.addr);
 
 	if (len > BUFSZ)
@@ -387,24 +455,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 		goto out;
 	}
 
-	/* objdump struggles with kcore - try each map only once */
-	if (dso__is_kcore(dso)) {
-		size_t d;
-
-		for (d = 0; d < state->done_cnt; d++) {
-			if (state->done[d] == map__start(al.map)) {
-				pr_debug("kcore map tested already");
-				pr_debug(" - skipping\n");
-				goto out;
-			}
-		}
-		if (state->done_cnt >= ARRAY_SIZE(state->done)) {
-			pr_debug("Too many kcore maps - skipping\n");
-			goto out;
-		}
-		state->done[state->done_cnt++] = map__start(al.map);
-	}
-
 	objdump_name = dso__long_name(dso);
 	if (dso__needs_decompress(dso)) {
 		if (dso__decompress_kmodule_path(dso, objdump_name,
@@ -471,9 +521,9 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	return err;
 }
 
-static int process_sample_event(struct machine *machine,
-				struct evlist *evlist,
-				union perf_event *event, struct state *state)
+static int process_sample_event(struct machine *machine, struct evlist *evlist,
+				union perf_event *event,
+				struct rb_root *tested_sections)
 {
 	struct perf_sample sample;
 	struct thread *thread;
@@ -494,7 +544,8 @@ static int process_sample_event(struct machine *machine,
 		goto out;
 	}
 
-	ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
+	ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread,
+			       tested_sections);
 	thread__put(thread);
 out:
 	perf_sample__exit(&sample);
@@ -502,10 +553,11 @@ static int process_sample_event(struct machine *machine,
 }
 
 static int process_event(struct machine *machine, struct evlist *evlist,
-			 union perf_event *event, struct state *state)
+			 union perf_event *event, struct rb_root *tested_sections)
 {
 	if (event->header.type == PERF_RECORD_SAMPLE)
-		return process_sample_event(machine, evlist, event, state);
+		return process_sample_event(machine, evlist, event,
+					    tested_sections);
 
 	if (event->header.type == PERF_RECORD_THROTTLE ||
 	    event->header.type == PERF_RECORD_UNTHROTTLE)
@@ -525,7 +577,7 @@ static int process_event(struct machine *machine, struct evlist *evlist,
 }
 
 static int process_events(struct machine *machine, struct evlist *evlist,
-			  struct state *state)
+			  struct rb_root *tested_sections)
 {
 	union perf_event *event;
 	struct mmap *md;
@@ -537,7 +589,7 @@ static int process_events(struct machine *machine, struct evlist *evlist,
 			continue;
 
 		while ((event = perf_mmap__read_event(&md->core)) != NULL) {
-			ret = process_event(machine, evlist, event, state);
+			ret = process_event(machine, evlist, event, tested_sections);
 			perf_mmap__consume(&md->core);
 			if (ret < 0)
 				return ret;
@@ -637,9 +689,7 @@ static int do_test_code_reading(bool try_kcore)
 			.uses_mmap   = true,
 		},
 	};
-	struct state state = {
-		.done_cnt = 0,
-	};
+	struct rb_root tested_sections = RB_ROOT;
 	struct perf_thread_map *threads = NULL;
 	struct perf_cpu_map *cpus = NULL;
 	struct evlist *evlist = NULL;
@@ -773,7 +823,7 @@ static int do_test_code_reading(bool try_kcore)
 
 	evlist__disable(evlist);
 
-	ret = process_events(machine, evlist, &state);
+	ret = process_events(machine, evlist, &tested_sections);
 	if (ret < 0)
 		goto out_put;
 
@@ -793,6 +843,7 @@ static int do_test_code_reading(bool try_kcore)
 	perf_thread_map__put(threads);
 	machine__delete(machine);
 	perf_env__exit(&host_env);
+	tested_sections__free(&tested_sections);
 
 	return err;
 }

---
base-commit: a22d167ed82505f770340c3a7c257c04ba24dac9
change-id: 20251006-james-perf-object-code-reading-9646e486d595

Best regards,
-- 
James Clark <james.clark@linaro.org>


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tests: Don't retest sections in "Object code reading"
  2025-10-06 13:11 [PATCH] perf tests: Don't retest sections in "Object code reading" James Clark
@ 2025-10-06 15:21 ` Ian Rogers
  2025-10-06 20:03   ` Arnaldo Carvalho de Melo
  2025-10-07  9:10   ` James Clark
  0 siblings, 2 replies; 11+ messages in thread
From: Ian Rogers @ 2025-10-06 15:21 UTC (permalink / raw)
  To: James Clark
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Leo Yan, linux-perf-users, linux-kernel

On Mon, Oct 6, 2025 at 6:11 AM James Clark <james.clark@linaro.org> wrote:
>
> We already only test each kcore map once, but on slow systems
> (particularly with network filesystems) even the non-kcore maps are
> slow. The test can test the same objump output over and over which only
> wastes time. Generalize the skipping mechanism to track all DSOs and
> addresses so that each section is only tested once.
>
> On a fully loaded Arm Juno (simulating a parallel Perf test run) with a
> network filesystem, the original runtime is:
>
>   real  1m51.126s
>   user  0m19.445s
>   sys   1m15.431s
>
> And the new runtime is:
>
>   real  0m48.873s
>   user  0m8.031s
>   sys   0m32.353s
>
> Signed-off-by: James Clark <james.clark@linaro.org>

Reviewed-by: Ian Rogers <irogers@google.com>

> ---
>  tools/perf/tests/code-reading.c | 119 ++++++++++++++++++++++++++++------------
>  1 file changed, 85 insertions(+), 34 deletions(-)
>
> diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
> index 9c2091310191..4c9fbf6965c4 100644
> --- a/tools/perf/tests/code-reading.c
> +++ b/tools/perf/tests/code-reading.c
> @@ -2,6 +2,7 @@
>  #include <errno.h>
>  #include <linux/kconfig.h>
>  #include <linux/kernel.h>
> +#include <linux/rbtree.h>
>  #include <linux/types.h>
>  #include <inttypes.h>
>  #include <stdlib.h>
> @@ -39,11 +40,64 @@
>  #define BUFSZ  1024
>  #define READLEN        128
>
> -struct state {
> -       u64 done[1024];
> -       size_t done_cnt;
> +struct tested_section {
> +       struct rb_node rb_node;
> +       u64 addr;
> +       char path[PATH_MAX];
>  };
>
> +static bool tested_code_insert_or_exists(const char *path, u64 addr,
> +                                        struct rb_root *tested_sections)
> +{
> +       struct rb_node **node = &tested_sections->rb_node;
> +       struct rb_node *parent = NULL;
> +       struct tested_section *data;
> +
> +       while (*node) {
> +               int cmp;
> +
> +               parent = *node;
> +               data = rb_entry(*node, struct tested_section, rb_node);
> +               cmp = strcmp(path, data->path);
> +               if (!cmp) {
> +                       if (addr < data->addr)
> +                               cmp = -1;
> +                       else if (addr > data->addr)
> +                               cmp = 1;
> +                       else
> +                               return true; /* already tested */
> +               }
> +
> +               if (cmp < 0)
> +                       node = &(*node)->rb_left;
> +               else
> +                       node = &(*node)->rb_right;
> +       }
> +
> +       data = zalloc(sizeof(*data));
> +       if (!data)
> +               return true;
> +
> +       data->addr = addr;
> +       strlcpy(data->path, path, sizeof(data->path));

nit: perhaps strdup rather than having 4kb per tested_section.

Thanks,
Ian

> +       rb_link_node(&data->rb_node, parent, node);
> +       rb_insert_color(&data->rb_node, tested_sections);
> +       return false;
> +}
> +
> +static void tested_sections__free(struct rb_root *root)
> +{
> +       while (!RB_EMPTY_ROOT(root)) {
> +               struct rb_node *node = rb_first(root);
> +               struct tested_section *ts = rb_entry(node,
> +                                                    struct tested_section,
> +                                                    rb_node);
> +
> +               rb_erase(node, root);
> +               free(ts);
> +       }
> +}
> +
>  static size_t read_objdump_chunk(const char **line, unsigned char **buf,
>                                  size_t *buf_len)
>  {
> @@ -316,13 +370,15 @@ static void dump_buf(unsigned char *buf, size_t len)
>  }
>
>  static int read_object_code(u64 addr, size_t len, u8 cpumode,
> -                           struct thread *thread, struct state *state)
> +                           struct thread *thread,
> +                           struct rb_root *tested_sections)
>  {
>         struct addr_location al;
>         unsigned char buf1[BUFSZ] = {0};
>         unsigned char buf2[BUFSZ] = {0};
>         size_t ret_len;
>         u64 objdump_addr;
> +       u64 skip_addr;
>         const char *objdump_name;
>         char decomp_name[KMOD_DECOMP_LEN];
>         bool decomp = false;
> @@ -350,6 +406,18 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
>                 goto out;
>         }
>
> +       /*
> +        * Don't retest the same addresses. objdump struggles with kcore - try
> +        * each map only once even if the address is different.
> +        */
> +       skip_addr = dso__is_kcore(dso) ? map__start(al.map) : al.addr;
> +       if (tested_code_insert_or_exists(dso__long_name(dso), skip_addr,
> +                                        tested_sections)) {
> +               pr_debug("Already tested %s @ %#"PRIx64" - skipping\n",
> +                        dso__long_name(dso), skip_addr);
> +               goto out;
> +       }
> +
>         pr_debug("On file address is: %#"PRIx64"\n", al.addr);
>
>         if (len > BUFSZ)
> @@ -387,24 +455,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
>                 goto out;
>         }
>
> -       /* objdump struggles with kcore - try each map only once */
> -       if (dso__is_kcore(dso)) {
> -               size_t d;
> -
> -               for (d = 0; d < state->done_cnt; d++) {
> -                       if (state->done[d] == map__start(al.map)) {
> -                               pr_debug("kcore map tested already");
> -                               pr_debug(" - skipping\n");
> -                               goto out;
> -                       }
> -               }
> -               if (state->done_cnt >= ARRAY_SIZE(state->done)) {
> -                       pr_debug("Too many kcore maps - skipping\n");
> -                       goto out;
> -               }
> -               state->done[state->done_cnt++] = map__start(al.map);
> -       }
> -
>         objdump_name = dso__long_name(dso);
>         if (dso__needs_decompress(dso)) {
>                 if (dso__decompress_kmodule_path(dso, objdump_name,
> @@ -471,9 +521,9 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
>         return err;
>  }
>
> -static int process_sample_event(struct machine *machine,
> -                               struct evlist *evlist,
> -                               union perf_event *event, struct state *state)
> +static int process_sample_event(struct machine *machine, struct evlist *evlist,
> +                               union perf_event *event,
> +                               struct rb_root *tested_sections)
>  {
>         struct perf_sample sample;
>         struct thread *thread;
> @@ -494,7 +544,8 @@ static int process_sample_event(struct machine *machine,
>                 goto out;
>         }
>
> -       ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
> +       ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread,
> +                              tested_sections);
>         thread__put(thread);
>  out:
>         perf_sample__exit(&sample);
> @@ -502,10 +553,11 @@ static int process_sample_event(struct machine *machine,
>  }
>
>  static int process_event(struct machine *machine, struct evlist *evlist,
> -                        union perf_event *event, struct state *state)
> +                        union perf_event *event, struct rb_root *tested_sections)
>  {
>         if (event->header.type == PERF_RECORD_SAMPLE)
> -               return process_sample_event(machine, evlist, event, state);
> +               return process_sample_event(machine, evlist, event,
> +                                           tested_sections);
>
>         if (event->header.type == PERF_RECORD_THROTTLE ||
>             event->header.type == PERF_RECORD_UNTHROTTLE)
> @@ -525,7 +577,7 @@ static int process_event(struct machine *machine, struct evlist *evlist,
>  }
>
>  static int process_events(struct machine *machine, struct evlist *evlist,
> -                         struct state *state)
> +                         struct rb_root *tested_sections)
>  {
>         union perf_event *event;
>         struct mmap *md;
> @@ -537,7 +589,7 @@ static int process_events(struct machine *machine, struct evlist *evlist,
>                         continue;
>
>                 while ((event = perf_mmap__read_event(&md->core)) != NULL) {
> -                       ret = process_event(machine, evlist, event, state);
> +                       ret = process_event(machine, evlist, event, tested_sections);
>                         perf_mmap__consume(&md->core);
>                         if (ret < 0)
>                                 return ret;
> @@ -637,9 +689,7 @@ static int do_test_code_reading(bool try_kcore)
>                         .uses_mmap   = true,
>                 },
>         };
> -       struct state state = {
> -               .done_cnt = 0,
> -       };
> +       struct rb_root tested_sections = RB_ROOT;
>         struct perf_thread_map *threads = NULL;
>         struct perf_cpu_map *cpus = NULL;
>         struct evlist *evlist = NULL;
> @@ -773,7 +823,7 @@ static int do_test_code_reading(bool try_kcore)
>
>         evlist__disable(evlist);
>
> -       ret = process_events(machine, evlist, &state);
> +       ret = process_events(machine, evlist, &tested_sections);
>         if (ret < 0)
>                 goto out_put;
>
> @@ -793,6 +843,7 @@ static int do_test_code_reading(bool try_kcore)
>         perf_thread_map__put(threads);
>         machine__delete(machine);
>         perf_env__exit(&host_env);
> +       tested_sections__free(&tested_sections);
>
>         return err;
>  }
>
> ---
> base-commit: a22d167ed82505f770340c3a7c257c04ba24dac9
> change-id: 20251006-james-perf-object-code-reading-9646e486d595
>
> Best regards,
> --
> James Clark <james.clark@linaro.org>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tests: Don't retest sections in "Object code reading"
  2025-10-06 15:21 ` Ian Rogers
@ 2025-10-06 20:03   ` Arnaldo Carvalho de Melo
  2025-10-07  9:10   ` James Clark
  1 sibling, 0 replies; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-10-06 20:03 UTC (permalink / raw)
  To: Ian Rogers
  Cc: James Clark, Peter Zijlstra, Ingo Molnar, Namhyung Kim,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Leo Yan, linux-perf-users, linux-kernel

On Mon, Oct 06, 2025 at 08:21:12AM -0700, Ian Rogers wrote:
> On Mon, Oct 6, 2025 at 6:11 AM James Clark <james.clark@linaro.org> wrote:
> > We already only test each kcore map once, but on slow systems
> > (particularly with network filesystems) even the non-kcore maps are
> > slow. The test can test the same objump output over and over which only
> > wastes time. Generalize the skipping mechanism to track all DSOs and
> > addresses so that each section is only tested once.
> >
> > On a fully loaded Arm Juno (simulating a parallel Perf test run) with a
> > network filesystem, the original runtime is:
> >
> >   real  1m51.126s
> >   user  0m19.445s
> >   sys   1m15.431s
> >
> > And the new runtime is:
> >
> >   real  0m48.873s
> >   user  0m8.031s
> >   sys   0m32.353s
> >
> > Signed-off-by: James Clark <james.clark@linaro.org>
> 
> Reviewed-by: Ian Rogers <irogers@google.com>

Thanks, applied to perf-tools-next,

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tests: Don't retest sections in "Object code reading"
  2025-10-06 15:21 ` Ian Rogers
  2025-10-06 20:03   ` Arnaldo Carvalho de Melo
@ 2025-10-07  9:10   ` James Clark
  2025-10-07 13:16     ` Ian Rogers
  2025-10-07 20:01     ` Arnaldo Carvalho de Melo
  1 sibling, 2 replies; 11+ messages in thread
From: James Clark @ 2025-10-07  9:10 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Leo Yan, linux-perf-users, linux-kernel



On 06/10/2025 4:21 pm, Ian Rogers wrote:
> On Mon, Oct 6, 2025 at 6:11 AM James Clark <james.clark@linaro.org> wrote:
>>
>> We already only test each kcore map once, but on slow systems
>> (particularly with network filesystems) even the non-kcore maps are
>> slow. The test can test the same objump output over and over which only
>> wastes time. Generalize the skipping mechanism to track all DSOs and
>> addresses so that each section is only tested once.
>>
>> On a fully loaded Arm Juno (simulating a parallel Perf test run) with a
>> network filesystem, the original runtime is:
>>
>>    real  1m51.126s
>>    user  0m19.445s
>>    sys   1m15.431s
>>
>> And the new runtime is:
>>
>>    real  0m48.873s
>>    user  0m8.031s
>>    sys   0m32.353s
>>
>> Signed-off-by: James Clark <james.clark@linaro.org>
> 
> Reviewed-by: Ian Rogers <irogers@google.com>
> 
>> ---
>>   tools/perf/tests/code-reading.c | 119 ++++++++++++++++++++++++++++------------
>>   1 file changed, 85 insertions(+), 34 deletions(-)
>>
>> diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
>> index 9c2091310191..4c9fbf6965c4 100644
>> --- a/tools/perf/tests/code-reading.c
>> +++ b/tools/perf/tests/code-reading.c
>> @@ -2,6 +2,7 @@
>>   #include <errno.h>
>>   #include <linux/kconfig.h>
>>   #include <linux/kernel.h>
>> +#include <linux/rbtree.h>
>>   #include <linux/types.h>
>>   #include <inttypes.h>
>>   #include <stdlib.h>
>> @@ -39,11 +40,64 @@
>>   #define BUFSZ  1024
>>   #define READLEN        128
>>
>> -struct state {
>> -       u64 done[1024];
>> -       size_t done_cnt;
>> +struct tested_section {
>> +       struct rb_node rb_node;
>> +       u64 addr;
>> +       char path[PATH_MAX];
>>   };
>>
>> +static bool tested_code_insert_or_exists(const char *path, u64 addr,
>> +                                        struct rb_root *tested_sections)
>> +{
>> +       struct rb_node **node = &tested_sections->rb_node;
>> +       struct rb_node *parent = NULL;
>> +       struct tested_section *data;
>> +
>> +       while (*node) {
>> +               int cmp;
>> +
>> +               parent = *node;
>> +               data = rb_entry(*node, struct tested_section, rb_node);
>> +               cmp = strcmp(path, data->path);
>> +               if (!cmp) {
>> +                       if (addr < data->addr)
>> +                               cmp = -1;
>> +                       else if (addr > data->addr)
>> +                               cmp = 1;
>> +                       else
>> +                               return true; /* already tested */
>> +               }
>> +
>> +               if (cmp < 0)
>> +                       node = &(*node)->rb_left;
>> +               else
>> +                       node = &(*node)->rb_right;
>> +       }
>> +
>> +       data = zalloc(sizeof(*data));
>> +       if (!data)
>> +               return true;
>> +
>> +       data->addr = addr;
>> +       strlcpy(data->path, path, sizeof(data->path));
> 
> nit: perhaps strdup rather than having 4kb per tested_section.
> 
> Thanks,
> Ian
> 

Oh yeah that would have been better, not sure why I didn't do it that 
way. Although the max sections I saw was around 50, and it's usually a 
lot less so it's probably not worth the churn to change it now that 
Arnaldo's applied it?

Thanks
James

>> +       rb_link_node(&data->rb_node, parent, node);
>> +       rb_insert_color(&data->rb_node, tested_sections);
>> +       return false;
>> +}
>> +
>> +static void tested_sections__free(struct rb_root *root)
>> +{
>> +       while (!RB_EMPTY_ROOT(root)) {
>> +               struct rb_node *node = rb_first(root);
>> +               struct tested_section *ts = rb_entry(node,
>> +                                                    struct tested_section,
>> +                                                    rb_node);
>> +
>> +               rb_erase(node, root);
>> +               free(ts);
>> +       }
>> +}
>> +
>>   static size_t read_objdump_chunk(const char **line, unsigned char **buf,
>>                                   size_t *buf_len)
>>   {
>> @@ -316,13 +370,15 @@ static void dump_buf(unsigned char *buf, size_t len)
>>   }
>>
>>   static int read_object_code(u64 addr, size_t len, u8 cpumode,
>> -                           struct thread *thread, struct state *state)
>> +                           struct thread *thread,
>> +                           struct rb_root *tested_sections)
>>   {
>>          struct addr_location al;
>>          unsigned char buf1[BUFSZ] = {0};
>>          unsigned char buf2[BUFSZ] = {0};
>>          size_t ret_len;
>>          u64 objdump_addr;
>> +       u64 skip_addr;
>>          const char *objdump_name;
>>          char decomp_name[KMOD_DECOMP_LEN];
>>          bool decomp = false;
>> @@ -350,6 +406,18 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
>>                  goto out;
>>          }
>>
>> +       /*
>> +        * Don't retest the same addresses. objdump struggles with kcore - try
>> +        * each map only once even if the address is different.
>> +        */
>> +       skip_addr = dso__is_kcore(dso) ? map__start(al.map) : al.addr;
>> +       if (tested_code_insert_or_exists(dso__long_name(dso), skip_addr,
>> +                                        tested_sections)) {
>> +               pr_debug("Already tested %s @ %#"PRIx64" - skipping\n",
>> +                        dso__long_name(dso), skip_addr);
>> +               goto out;
>> +       }
>> +
>>          pr_debug("On file address is: %#"PRIx64"\n", al.addr);
>>
>>          if (len > BUFSZ)
>> @@ -387,24 +455,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
>>                  goto out;
>>          }
>>
>> -       /* objdump struggles with kcore - try each map only once */
>> -       if (dso__is_kcore(dso)) {
>> -               size_t d;
>> -
>> -               for (d = 0; d < state->done_cnt; d++) {
>> -                       if (state->done[d] == map__start(al.map)) {
>> -                               pr_debug("kcore map tested already");
>> -                               pr_debug(" - skipping\n");
>> -                               goto out;
>> -                       }
>> -               }
>> -               if (state->done_cnt >= ARRAY_SIZE(state->done)) {
>> -                       pr_debug("Too many kcore maps - skipping\n");
>> -                       goto out;
>> -               }
>> -               state->done[state->done_cnt++] = map__start(al.map);
>> -       }
>> -
>>          objdump_name = dso__long_name(dso);
>>          if (dso__needs_decompress(dso)) {
>>                  if (dso__decompress_kmodule_path(dso, objdump_name,
>> @@ -471,9 +521,9 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
>>          return err;
>>   }
>>
>> -static int process_sample_event(struct machine *machine,
>> -                               struct evlist *evlist,
>> -                               union perf_event *event, struct state *state)
>> +static int process_sample_event(struct machine *machine, struct evlist *evlist,
>> +                               union perf_event *event,
>> +                               struct rb_root *tested_sections)
>>   {
>>          struct perf_sample sample;
>>          struct thread *thread;
>> @@ -494,7 +544,8 @@ static int process_sample_event(struct machine *machine,
>>                  goto out;
>>          }
>>
>> -       ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
>> +       ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread,
>> +                              tested_sections);
>>          thread__put(thread);
>>   out:
>>          perf_sample__exit(&sample);
>> @@ -502,10 +553,11 @@ static int process_sample_event(struct machine *machine,
>>   }
>>
>>   static int process_event(struct machine *machine, struct evlist *evlist,
>> -                        union perf_event *event, struct state *state)
>> +                        union perf_event *event, struct rb_root *tested_sections)
>>   {
>>          if (event->header.type == PERF_RECORD_SAMPLE)
>> -               return process_sample_event(machine, evlist, event, state);
>> +               return process_sample_event(machine, evlist, event,
>> +                                           tested_sections);
>>
>>          if (event->header.type == PERF_RECORD_THROTTLE ||
>>              event->header.type == PERF_RECORD_UNTHROTTLE)
>> @@ -525,7 +577,7 @@ static int process_event(struct machine *machine, struct evlist *evlist,
>>   }
>>
>>   static int process_events(struct machine *machine, struct evlist *evlist,
>> -                         struct state *state)
>> +                         struct rb_root *tested_sections)
>>   {
>>          union perf_event *event;
>>          struct mmap *md;
>> @@ -537,7 +589,7 @@ static int process_events(struct machine *machine, struct evlist *evlist,
>>                          continue;
>>
>>                  while ((event = perf_mmap__read_event(&md->core)) != NULL) {
>> -                       ret = process_event(machine, evlist, event, state);
>> +                       ret = process_event(machine, evlist, event, tested_sections);
>>                          perf_mmap__consume(&md->core);
>>                          if (ret < 0)
>>                                  return ret;
>> @@ -637,9 +689,7 @@ static int do_test_code_reading(bool try_kcore)
>>                          .uses_mmap   = true,
>>                  },
>>          };
>> -       struct state state = {
>> -               .done_cnt = 0,
>> -       };
>> +       struct rb_root tested_sections = RB_ROOT;
>>          struct perf_thread_map *threads = NULL;
>>          struct perf_cpu_map *cpus = NULL;
>>          struct evlist *evlist = NULL;
>> @@ -773,7 +823,7 @@ static int do_test_code_reading(bool try_kcore)
>>
>>          evlist__disable(evlist);
>>
>> -       ret = process_events(machine, evlist, &state);
>> +       ret = process_events(machine, evlist, &tested_sections);
>>          if (ret < 0)
>>                  goto out_put;
>>
>> @@ -793,6 +843,7 @@ static int do_test_code_reading(bool try_kcore)
>>          perf_thread_map__put(threads);
>>          machine__delete(machine);
>>          perf_env__exit(&host_env);
>> +       tested_sections__free(&tested_sections);
>>
>>          return err;
>>   }
>>
>> ---
>> base-commit: a22d167ed82505f770340c3a7c257c04ba24dac9
>> change-id: 20251006-james-perf-object-code-reading-9646e486d595
>>
>> Best regards,
>> --
>> James Clark <james.clark@linaro.org>
>>


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tests: Don't retest sections in "Object code reading"
  2025-10-07  9:10   ` James Clark
@ 2025-10-07 13:16     ` Ian Rogers
  2025-10-07 20:01     ` Arnaldo Carvalho de Melo
  1 sibling, 0 replies; 11+ messages in thread
From: Ian Rogers @ 2025-10-07 13:16 UTC (permalink / raw)
  To: James Clark
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Adrian Hunter, Leo Yan, linux-perf-users, linux-kernel

On Tue, Oct 7, 2025 at 2:10 AM James Clark <james.clark@linaro.org> wrote:
>
>
>
> On 06/10/2025 4:21 pm, Ian Rogers wrote:
> > On Mon, Oct 6, 2025 at 6:11 AM James Clark <james.clark@linaro.org> wrote:
> >>
> >> We already only test each kcore map once, but on slow systems
> >> (particularly with network filesystems) even the non-kcore maps are
> >> slow. The test can test the same objump output over and over which only
> >> wastes time. Generalize the skipping mechanism to track all DSOs and
> >> addresses so that each section is only tested once.
> >>
> >> On a fully loaded Arm Juno (simulating a parallel Perf test run) with a
> >> network filesystem, the original runtime is:
> >>
> >>    real  1m51.126s
> >>    user  0m19.445s
> >>    sys   1m15.431s
> >>
> >> And the new runtime is:
> >>
> >>    real  0m48.873s
> >>    user  0m8.031s
> >>    sys   0m32.353s
> >>
> >> Signed-off-by: James Clark <james.clark@linaro.org>
> >
> > Reviewed-by: Ian Rogers <irogers@google.com>
> >
> >> ---
> >>   tools/perf/tests/code-reading.c | 119 ++++++++++++++++++++++++++++------------
> >>   1 file changed, 85 insertions(+), 34 deletions(-)
> >>
> >> diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
> >> index 9c2091310191..4c9fbf6965c4 100644
> >> --- a/tools/perf/tests/code-reading.c
> >> +++ b/tools/perf/tests/code-reading.c
> >> @@ -2,6 +2,7 @@
> >>   #include <errno.h>
> >>   #include <linux/kconfig.h>
> >>   #include <linux/kernel.h>
> >> +#include <linux/rbtree.h>
> >>   #include <linux/types.h>
> >>   #include <inttypes.h>
> >>   #include <stdlib.h>
> >> @@ -39,11 +40,64 @@
> >>   #define BUFSZ  1024
> >>   #define READLEN        128
> >>
> >> -struct state {
> >> -       u64 done[1024];
> >> -       size_t done_cnt;
> >> +struct tested_section {
> >> +       struct rb_node rb_node;
> >> +       u64 addr;
> >> +       char path[PATH_MAX];
> >>   };
> >>
> >> +static bool tested_code_insert_or_exists(const char *path, u64 addr,
> >> +                                        struct rb_root *tested_sections)
> >> +{
> >> +       struct rb_node **node = &tested_sections->rb_node;
> >> +       struct rb_node *parent = NULL;
> >> +       struct tested_section *data;
> >> +
> >> +       while (*node) {
> >> +               int cmp;
> >> +
> >> +               parent = *node;
> >> +               data = rb_entry(*node, struct tested_section, rb_node);
> >> +               cmp = strcmp(path, data->path);
> >> +               if (!cmp) {
> >> +                       if (addr < data->addr)
> >> +                               cmp = -1;
> >> +                       else if (addr > data->addr)
> >> +                               cmp = 1;
> >> +                       else
> >> +                               return true; /* already tested */
> >> +               }
> >> +
> >> +               if (cmp < 0)
> >> +                       node = &(*node)->rb_left;
> >> +               else
> >> +                       node = &(*node)->rb_right;
> >> +       }
> >> +
> >> +       data = zalloc(sizeof(*data));
> >> +       if (!data)
> >> +               return true;
> >> +
> >> +       data->addr = addr;
> >> +       strlcpy(data->path, path, sizeof(data->path));
> >
> > nit: perhaps strdup rather than having 4kb per tested_section.
> >
> > Thanks,
> > Ian
> >
>
> Oh yeah that would have been better, not sure why I didn't do it that
> way. Although the max sections I saw was around 50, and it's usually a
> lot less so it's probably not worth the churn to change it now that
> Arnaldo's applied it?

I'm easy. It is one of those things I'd probably eye-ball at some
later date and send a driveby patch to change. You could send a patch
and let Arnaldo decide whether to squash it or add a follow up.

Thanks,
Ian

> Thanks
> James
>
> >> +       rb_link_node(&data->rb_node, parent, node);
> >> +       rb_insert_color(&data->rb_node, tested_sections);
> >> +       return false;
> >> +}
> >> +
> >> +static void tested_sections__free(struct rb_root *root)
> >> +{
> >> +       while (!RB_EMPTY_ROOT(root)) {
> >> +               struct rb_node *node = rb_first(root);
> >> +               struct tested_section *ts = rb_entry(node,
> >> +                                                    struct tested_section,
> >> +                                                    rb_node);
> >> +
> >> +               rb_erase(node, root);
> >> +               free(ts);
> >> +       }
> >> +}
> >> +
> >>   static size_t read_objdump_chunk(const char **line, unsigned char **buf,
> >>                                   size_t *buf_len)
> >>   {
> >> @@ -316,13 +370,15 @@ static void dump_buf(unsigned char *buf, size_t len)
> >>   }
> >>
> >>   static int read_object_code(u64 addr, size_t len, u8 cpumode,
> >> -                           struct thread *thread, struct state *state)
> >> +                           struct thread *thread,
> >> +                           struct rb_root *tested_sections)
> >>   {
> >>          struct addr_location al;
> >>          unsigned char buf1[BUFSZ] = {0};
> >>          unsigned char buf2[BUFSZ] = {0};
> >>          size_t ret_len;
> >>          u64 objdump_addr;
> >> +       u64 skip_addr;
> >>          const char *objdump_name;
> >>          char decomp_name[KMOD_DECOMP_LEN];
> >>          bool decomp = false;
> >> @@ -350,6 +406,18 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
> >>                  goto out;
> >>          }
> >>
> >> +       /*
> >> +        * Don't retest the same addresses. objdump struggles with kcore - try
> >> +        * each map only once even if the address is different.
> >> +        */
> >> +       skip_addr = dso__is_kcore(dso) ? map__start(al.map) : al.addr;
> >> +       if (tested_code_insert_or_exists(dso__long_name(dso), skip_addr,
> >> +                                        tested_sections)) {
> >> +               pr_debug("Already tested %s @ %#"PRIx64" - skipping\n",
> >> +                        dso__long_name(dso), skip_addr);
> >> +               goto out;
> >> +       }
> >> +
> >>          pr_debug("On file address is: %#"PRIx64"\n", al.addr);
> >>
> >>          if (len > BUFSZ)
> >> @@ -387,24 +455,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
> >>                  goto out;
> >>          }
> >>
> >> -       /* objdump struggles with kcore - try each map only once */
> >> -       if (dso__is_kcore(dso)) {
> >> -               size_t d;
> >> -
> >> -               for (d = 0; d < state->done_cnt; d++) {
> >> -                       if (state->done[d] == map__start(al.map)) {
> >> -                               pr_debug("kcore map tested already");
> >> -                               pr_debug(" - skipping\n");
> >> -                               goto out;
> >> -                       }
> >> -               }
> >> -               if (state->done_cnt >= ARRAY_SIZE(state->done)) {
> >> -                       pr_debug("Too many kcore maps - skipping\n");
> >> -                       goto out;
> >> -               }
> >> -               state->done[state->done_cnt++] = map__start(al.map);
> >> -       }
> >> -
> >>          objdump_name = dso__long_name(dso);
> >>          if (dso__needs_decompress(dso)) {
> >>                  if (dso__decompress_kmodule_path(dso, objdump_name,
> >> @@ -471,9 +521,9 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
> >>          return err;
> >>   }
> >>
> >> -static int process_sample_event(struct machine *machine,
> >> -                               struct evlist *evlist,
> >> -                               union perf_event *event, struct state *state)
> >> +static int process_sample_event(struct machine *machine, struct evlist *evlist,
> >> +                               union perf_event *event,
> >> +                               struct rb_root *tested_sections)
> >>   {
> >>          struct perf_sample sample;
> >>          struct thread *thread;
> >> @@ -494,7 +544,8 @@ static int process_sample_event(struct machine *machine,
> >>                  goto out;
> >>          }
> >>
> >> -       ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
> >> +       ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread,
> >> +                              tested_sections);
> >>          thread__put(thread);
> >>   out:
> >>          perf_sample__exit(&sample);
> >> @@ -502,10 +553,11 @@ static int process_sample_event(struct machine *machine,
> >>   }
> >>
> >>   static int process_event(struct machine *machine, struct evlist *evlist,
> >> -                        union perf_event *event, struct state *state)
> >> +                        union perf_event *event, struct rb_root *tested_sections)
> >>   {
> >>          if (event->header.type == PERF_RECORD_SAMPLE)
> >> -               return process_sample_event(machine, evlist, event, state);
> >> +               return process_sample_event(machine, evlist, event,
> >> +                                           tested_sections);
> >>
> >>          if (event->header.type == PERF_RECORD_THROTTLE ||
> >>              event->header.type == PERF_RECORD_UNTHROTTLE)
> >> @@ -525,7 +577,7 @@ static int process_event(struct machine *machine, struct evlist *evlist,
> >>   }
> >>
> >>   static int process_events(struct machine *machine, struct evlist *evlist,
> >> -                         struct state *state)
> >> +                         struct rb_root *tested_sections)
> >>   {
> >>          union perf_event *event;
> >>          struct mmap *md;
> >> @@ -537,7 +589,7 @@ static int process_events(struct machine *machine, struct evlist *evlist,
> >>                          continue;
> >>
> >>                  while ((event = perf_mmap__read_event(&md->core)) != NULL) {
> >> -                       ret = process_event(machine, evlist, event, state);
> >> +                       ret = process_event(machine, evlist, event, tested_sections);
> >>                          perf_mmap__consume(&md->core);
> >>                          if (ret < 0)
> >>                                  return ret;
> >> @@ -637,9 +689,7 @@ static int do_test_code_reading(bool try_kcore)
> >>                          .uses_mmap   = true,
> >>                  },
> >>          };
> >> -       struct state state = {
> >> -               .done_cnt = 0,
> >> -       };
> >> +       struct rb_root tested_sections = RB_ROOT;
> >>          struct perf_thread_map *threads = NULL;
> >>          struct perf_cpu_map *cpus = NULL;
> >>          struct evlist *evlist = NULL;
> >> @@ -773,7 +823,7 @@ static int do_test_code_reading(bool try_kcore)
> >>
> >>          evlist__disable(evlist);
> >>
> >> -       ret = process_events(machine, evlist, &state);
> >> +       ret = process_events(machine, evlist, &tested_sections);
> >>          if (ret < 0)
> >>                  goto out_put;
> >>
> >> @@ -793,6 +843,7 @@ static int do_test_code_reading(bool try_kcore)
> >>          perf_thread_map__put(threads);
> >>          machine__delete(machine);
> >>          perf_env__exit(&host_env);
> >> +       tested_sections__free(&tested_sections);
> >>
> >>          return err;
> >>   }
> >>
> >> ---
> >> base-commit: a22d167ed82505f770340c3a7c257c04ba24dac9
> >> change-id: 20251006-james-perf-object-code-reading-9646e486d595
> >>
> >> Best regards,
> >> --
> >> James Clark <james.clark@linaro.org>
> >>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tests: Don't retest sections in "Object code reading"
  2025-10-07  9:10   ` James Clark
  2025-10-07 13:16     ` Ian Rogers
@ 2025-10-07 20:01     ` Arnaldo Carvalho de Melo
  2025-10-08  8:32       ` James Clark
  1 sibling, 1 reply; 11+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-10-07 20:01 UTC (permalink / raw)
  To: James Clark
  Cc: Ian Rogers, Peter Zijlstra, Ingo Molnar, Namhyung Kim,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Leo Yan, linux-perf-users, linux-kernel

On Tue, Oct 07, 2025 at 10:10:12AM +0100, James Clark wrote:
> On 06/10/2025 4:21 pm, Ian Rogers wrote:
> > On Mon, Oct 6, 2025 at 6:11 AM James Clark <james.clark@linaro.org> wrote:
> > > +       data = zalloc(sizeof(*data));
> > > +       if (!data)
> > > +               return true;

> > > +       data->addr = addr;
> > > +       strlcpy(data->path, path, sizeof(data->path));
 
> > nit: perhaps strdup rather than having 4kb per tested_section.

> Oh yeah that would have been better, not sure why I didn't do it that way.
> Although the max sections I saw was around 50, and it's usually a lot less
> so it's probably not worth the churn to change it now that Arnaldo's applied
> it?

I see you submitted a patch for using strdup() and then there is a need
for checking the strdup(), etc.

Since at this point this is an improvement on a test and all is sitting
in linux-next and the window is closing for v6.18, lets leave this for
the next window, ok?

These would be good things for some tool to catch, before it gets sent,
but that is another rabbit hole :-)

Thanks,

- Arnaldo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tests: Don't retest sections in "Object code reading"
  2025-10-07 20:01     ` Arnaldo Carvalho de Melo
@ 2025-10-08  8:32       ` James Clark
  2025-10-08 10:21         ` James Clark
  0 siblings, 1 reply; 11+ messages in thread
From: James Clark @ 2025-10-08  8:32 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Ian Rogers, Peter Zijlstra, Ingo Molnar, Namhyung Kim,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Leo Yan, linux-perf-users, linux-kernel



On 07/10/2025 9:01 pm, Arnaldo Carvalho de Melo wrote:
> On Tue, Oct 07, 2025 at 10:10:12AM +0100, James Clark wrote:
>> On 06/10/2025 4:21 pm, Ian Rogers wrote:
>>> On Mon, Oct 6, 2025 at 6:11 AM James Clark <james.clark@linaro.org> wrote:
>>>> +       data = zalloc(sizeof(*data));
>>>> +       if (!data)
>>>> +               return true;
> 
>>>> +       data->addr = addr;
>>>> +       strlcpy(data->path, path, sizeof(data->path));
>   
>>> nit: perhaps strdup rather than having 4kb per tested_section.
> 
>> Oh yeah that would have been better, not sure why I didn't do it that way.
>> Although the max sections I saw was around 50, and it's usually a lot less
>> so it's probably not worth the churn to change it now that Arnaldo's applied
>> it?
> 
> I see you submitted a patch for using strdup() and then there is a need
> for checking the strdup(), etc.
> 
> Since at this point this is an improvement on a test and all is sitting
> in linux-next and the window is closing for v6.18, lets leave this for
> the next window, ok?
> 

Makes sense.

> These would be good things for some tool to catch, before it gets sent,
> but that is another rabbit hole :-)
> 
> Thanks,
> 
> - Arnaldo

Does Smatch work on Perf? I imagine it would catch this if it does. Or 
just plain old cppcheck. I'll take a look.

James


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tests: Don't retest sections in "Object code reading"
  2025-10-08  8:32       ` James Clark
@ 2025-10-08 10:21         ` James Clark
  2025-10-08 13:39           ` Dan Carpenter
  0 siblings, 1 reply; 11+ messages in thread
From: James Clark @ 2025-10-08 10:21 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Dan Carpenter,
	Charlie Jenkins
  Cc: Peter Zijlstra, Ingo Molnar, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Leo Yan,
	linux-perf-users, linux-kernel



On 08/10/2025 9:32 am, James Clark wrote:
> 
> 
> On 07/10/2025 9:01 pm, Arnaldo Carvalho de Melo wrote:
>> On Tue, Oct 07, 2025 at 10:10:12AM +0100, James Clark wrote:
>>> On 06/10/2025 4:21 pm, Ian Rogers wrote:
>>>> On Mon, Oct 6, 2025 at 6:11 AM James Clark <james.clark@linaro.org> 
>>>> wrote:
>>>>> +       data = zalloc(sizeof(*data));
>>>>> +       if (!data)
>>>>> +               return true;
>>
>>>>> +       data->addr = addr;
>>>>> +       strlcpy(data->path, path, sizeof(data->path));
>>>> nit: perhaps strdup rather than having 4kb per tested_section.
>>
>>> Oh yeah that would have been better, not sure why I didn't do it that 
>>> way.
>>> Although the max sections I saw was around 50, and it's usually a lot 
>>> less
>>> so it's probably not worth the churn to change it now that Arnaldo's 
>>> applied
>>> it?
>>
>> I see you submitted a patch for using strdup() and then there is a need
>> for checking the strdup(), etc.
>>
>> Since at this point this is an improvement on a test and all is sitting
>> in linux-next and the window is closing for v6.18, lets leave this for
>> the next window, ok?
>>
> 
> Makes sense.
> 
>> These would be good things for some tool to catch, before it gets sent,
>> but that is another rabbit hole :-)
>>
>> Thanks,
>>
>> - Arnaldo
> 
> Does Smatch work on Perf? I imagine it would catch this if it does. Or 
> just plain old cppcheck. I'll take a look.
> 
> James
> 

Smatch doesn't know about strdup and seems to be more focused on kernel 
so probably isn't a good fit.

Cppcheck struggles with a lot of the kernely style that's used in Perf, 
especially the headers. We'd either have to silence a lot of the 
warnings, or start with almost no warnings enabled.

It doesn't have a warning for usage of a malloc return value without 
NULL checking, so in this case it wouldn't be useful.

I'm not 100% convinced that the effort of integrating one of these into 
the build system would be worth it. I know that once they're in, there 
would be constant maintenance of silencing false positives etc. And a 
lot of the warnings are more style or opinions about undefined behavior 
according to the standard, but aren't real based on what the compiler 
actually does.

As an example, cppcheck on code-reading.c with --enable=all gives 17 
portability, 11 style, 3 warning and 1 error outputs. Out of all of 
these only two of the warnings are significant, from commit 0f9ad973b095 
("perf tests code-reading: Handle change in objdump output from binutils 
 >= 2.41 on riscv"):

   token = strsep(&version, ".");
   version_tmp = atoi(token);
   if (token)
       version_num += version_tmp * 100;

   token = strsep(&version, ".");
   version_tmp = atoi(token);
   if (token)
       version_num += version_tmp;
			
"token" has already been passed to atoi() so can't be NULL in the if 
statement. I think the atoi() needs to come after the null check.

James


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tests: Don't retest sections in "Object code reading"
  2025-10-08 10:21         ` James Clark
@ 2025-10-08 13:39           ` Dan Carpenter
  2025-10-08 14:11             ` James Clark
  0 siblings, 1 reply; 11+ messages in thread
From: Dan Carpenter @ 2025-10-08 13:39 UTC (permalink / raw)
  To: James Clark
  Cc: Arnaldo Carvalho de Melo, Ian Rogers, Charlie Jenkins,
	Peter Zijlstra, Ingo Molnar, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Leo Yan,
	linux-perf-users, linux-kernel

On Wed, Oct 08, 2025 at 11:21:34AM +0100, James Clark wrote:
> 
> 
> On 08/10/2025 9:32 am, James Clark wrote:
> > 
> > 
> > On 07/10/2025 9:01 pm, Arnaldo Carvalho de Melo wrote:
> > > On Tue, Oct 07, 2025 at 10:10:12AM +0100, James Clark wrote:
> > > > On 06/10/2025 4:21 pm, Ian Rogers wrote:
> > > > > On Mon, Oct 6, 2025 at 6:11 AM James Clark
> > > > > <james.clark@linaro.org> wrote:
> > > > > > +       data = zalloc(sizeof(*data));
> > > > > > +       if (!data)
> > > > > > +               return true;
> > > 
> > > > > > +       data->addr = addr;
> > > > > > +       strlcpy(data->path, path, sizeof(data->path));
> > > > > nit: perhaps strdup rather than having 4kb per tested_section.
> > > 
> > > > Oh yeah that would have been better, not sure why I didn't do it
> > > > that way.
> > > > Although the max sections I saw was around 50, and it's usually
> > > > a lot less
> > > > so it's probably not worth the churn to change it now that
> > > > Arnaldo's applied
> > > > it?
> > > 
> > > I see you submitted a patch for using strdup() and then there is a need
> > > for checking the strdup(), etc.
> > > 
> > > Since at this point this is an improvement on a test and all is sitting
> > > in linux-next and the window is closing for v6.18, lets leave this for
> > > the next window, ok?
> > > 
> > 
> > Makes sense.
> > 
> > > These would be good things for some tool to catch, before it gets sent,
> > > but that is another rabbit hole :-)
> > > 
> > > Thanks,
> > > 
> > > - Arnaldo
> > 
> > Does Smatch work on Perf? I imagine it would catch this if it does. Or
> > just plain old cppcheck. I'll take a look.
> > 
> > James
> > 
> 
> Smatch doesn't know about strdup and seems to be more focused on kernel so
> probably isn't a good fit.
> 

No one is going to write a check which tells people when to use a fixed
array vs when to allocate memory dynamically.  Those sorts of decisions
are too complicated.

> Cppcheck struggles with a lot of the kernely style that's used in Perf,
> especially the headers. We'd either have to silence a lot of the warnings,
> or start with almost no warnings enabled.
> 
> It doesn't have a warning for usage of a malloc return value without NULL
> checking, so in this case it wouldn't be useful.

In the kernel we care a lot about missing NULL checks but in userspace
it doesn't really matter in the same way...  It's easy enough to write
a check for this in Smatch.

> 
> I'm not 100% convinced that the effort of integrating one of these into the
> build system would be worth it. I know that once they're in, there would be
> constant maintenance of silencing false positives etc. And a lot of the
> warnings are more style or opinions about undefined behavior according to
> the standard, but aren't real based on what the compiler actually does.
> 

The thing about false positives is that people should just ignore all the
warnings except the new ones.  In the kernel, this works well because we
fix all the real bugs right away.  And if we haven't eventually someone
will look at the code again and after 10 years or so it all either gets
fixed or it doesn't matter.

> As an example, cppcheck on code-reading.c with --enable=all gives 17
> portability, 11 style, 3 warning and 1 error outputs. Out of all of these
> only two of the warnings are significant, from commit 0f9ad973b095 ("perf
> tests code-reading: Handle change in objdump output from binutils >= 2.41 on
> riscv"):
> 
>   token = strsep(&version, ".");
>   version_tmp = atoi(token);
>   if (token)
>       version_num += version_tmp * 100;
> 
>   token = strsep(&version, ".");
>   version_tmp = atoi(token);
>   if (token)
>       version_num += version_tmp;
> 			
> "token" has already been passed to atoi() so can't be NULL in the if
> statement. I think the atoi() needs to come after the null check.
>

Yep.  Smatch does cross function analysis so it would have caught that.
(I haven't tested).

regards,
dan carpenter


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tests: Don't retest sections in "Object code reading"
  2025-10-08 13:39           ` Dan Carpenter
@ 2025-10-08 14:11             ` James Clark
  2025-10-08 15:07               ` Ian Rogers
  0 siblings, 1 reply; 11+ messages in thread
From: James Clark @ 2025-10-08 14:11 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Arnaldo Carvalho de Melo, Ian Rogers, Charlie Jenkins,
	Peter Zijlstra, Ingo Molnar, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Leo Yan,
	linux-perf-users, linux-kernel



On 08/10/2025 2:39 pm, Dan Carpenter wrote:
> On Wed, Oct 08, 2025 at 11:21:34AM +0100, James Clark wrote:
>>
>>
>> On 08/10/2025 9:32 am, James Clark wrote:
>>>
>>>
>>> On 07/10/2025 9:01 pm, Arnaldo Carvalho de Melo wrote:
>>>> On Tue, Oct 07, 2025 at 10:10:12AM +0100, James Clark wrote:
>>>>> On 06/10/2025 4:21 pm, Ian Rogers wrote:
>>>>>> On Mon, Oct 6, 2025 at 6:11 AM James Clark
>>>>>> <james.clark@linaro.org> wrote:
>>>>>>> +       data = zalloc(sizeof(*data));
>>>>>>> +       if (!data)
>>>>>>> +               return true;
>>>>
>>>>>>> +       data->addr = addr;
>>>>>>> +       strlcpy(data->path, path, sizeof(data->path));
>>>>>> nit: perhaps strdup rather than having 4kb per tested_section.
>>>>
>>>>> Oh yeah that would have been better, not sure why I didn't do it
>>>>> that way.
>>>>> Although the max sections I saw was around 50, and it's usually
>>>>> a lot less
>>>>> so it's probably not worth the churn to change it now that
>>>>> Arnaldo's applied
>>>>> it?
>>>>
>>>> I see you submitted a patch for using strdup() and then there is a need
>>>> for checking the strdup(), etc.
>>>>
>>>> Since at this point this is an improvement on a test and all is sitting
>>>> in linux-next and the window is closing for v6.18, lets leave this for
>>>> the next window, ok?
>>>>
>>>
>>> Makes sense.
>>>
>>>> These would be good things for some tool to catch, before it gets sent,
>>>> but that is another rabbit hole :-)
>>>>
>>>> Thanks,
>>>>
>>>> - Arnaldo
>>>
>>> Does Smatch work on Perf? I imagine it would catch this if it does. Or
>>> just plain old cppcheck. I'll take a look.
>>>
>>> James
>>>
>>
>> Smatch doesn't know about strdup and seems to be more focused on kernel so
>> probably isn't a good fit.
>>
> 
> No one is going to write a check which tells people when to use a fixed
> array vs when to allocate memory dynamically.  Those sorts of decisions
> are too complicated.
> 

I mean "doesn't know about strdup" in that it would have to know that 
it's an allocator and can return NULL which should be checked etc. Not 
that it should know about Ian's original suggestion to use strdup in the 
first place.

>> Cppcheck struggles with a lot of the kernely style that's used in Perf,
>> especially the headers. We'd either have to silence a lot of the warnings,
>> or start with almost no warnings enabled.
>>
>> It doesn't have a warning for usage of a malloc return value without NULL
>> checking, so in this case it wouldn't be useful.
> 
> In the kernel we care a lot about missing NULL checks but in userspace
> it doesn't really matter in the same way...  It's easy enough to write
> a check for this in Smatch.
> 
>>
>> I'm not 100% convinced that the effort of integrating one of these into the
>> build system would be worth it. I know that once they're in, there would be
>> constant maintenance of silencing false positives etc. And a lot of the
>> warnings are more style or opinions about undefined behavior according to
>> the standard, but aren't real based on what the compiler actually does.
>>
> 
> The thing about false positives is that people should just ignore all the
> warnings except the new ones.  In the kernel, this works well because we
> fix all the real bugs right away.  And if we haven't eventually someone
> will look at the code again and after 10 years or so it all either gets
> fixed or it doesn't matter.
> 

This requires some infrastructure though. The easiest way I imagined it 
working would be more like how we have compiler warnings and -Werror 
currently.

Not that we couldn't come up with some kind of infrastructure to track 
new errors. But I think it would be applied too sporadically to block 
people from sending a patch in the first place.

>> As an example, cppcheck on code-reading.c with --enable=all gives 17
>> portability, 11 style, 3 warning and 1 error outputs. Out of all of these
>> only two of the warnings are significant, from commit 0f9ad973b095 ("perf
>> tests code-reading: Handle change in objdump output from binutils >= 2.41 on
>> riscv"):
>>
>>    token = strsep(&version, ".");
>>    version_tmp = atoi(token);
>>    if (token)
>>        version_num += version_tmp * 100;
>>
>>    token = strsep(&version, ".");
>>    version_tmp = atoi(token);
>>    if (token)
>>        version_num += version_tmp;
>> 			
>> "token" has already been passed to atoi() so can't be NULL in the if
>> statement. I think the atoi() needs to come after the null check.
>>
> 
> Yep.  Smatch does cross function analysis so it would have caught that.
> (I haven't tested).
> 
> regards,
> dan carpenter
> 

I ran it on this file and it didn't say much. Although I don't know if 
I'm using it properly:

   smatch --full-path -I. -I../include -I../lib/perf/include -Iutil -I../
     arch/x86/include -I../lib tests/code-reading.c

   tests/code-reading.c: note: in included file:

   /usr/include/x86_64-linux-gnu//sys/param.h:93:10: warning: 
preprocessor token roundup redefined

   tests/code-reading.c: note: in included file (through 
../include/linux/kernel.h):
   ../include/linux/math.h:17:9: this was the original definition



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] perf tests: Don't retest sections in "Object code reading"
  2025-10-08 14:11             ` James Clark
@ 2025-10-08 15:07               ` Ian Rogers
  0 siblings, 0 replies; 11+ messages in thread
From: Ian Rogers @ 2025-10-08 15:07 UTC (permalink / raw)
  To: James Clark
  Cc: Dan Carpenter, Arnaldo Carvalho de Melo, Charlie Jenkins,
	Peter Zijlstra, Ingo Molnar, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Leo Yan,
	linux-perf-users, linux-kernel

On Wed, Oct 8, 2025 at 7:11 AM James Clark <james.clark@linaro.org> wrote:
> On 08/10/2025 2:39 pm, Dan Carpenter wrote:
> > On Wed, Oct 08, 2025 at 11:21:34AM +0100, James Clark wrote:
> >> On 08/10/2025 9:32 am, James Clark wrote:
> >>> On 07/10/2025 9:01 pm, Arnaldo Carvalho de Melo wrote:
> >>>> On Tue, Oct 07, 2025 at 10:10:12AM +0100, James Clark wrote:
> >>>>> On 06/10/2025 4:21 pm, Ian Rogers wrote:
> >>>>>> On Mon, Oct 6, 2025 at 6:11 AM James Clark
> >>>>>> <james.clark@linaro.org> wrote:
> >>>>>>> +       data = zalloc(sizeof(*data));
> >>>>>>> +       if (!data)
> >>>>>>> +               return true;
> >>>>
> >>>>>>> +       data->addr = addr;
> >>>>>>> +       strlcpy(data->path, path, sizeof(data->path));
> >>>>>> nit: perhaps strdup rather than having 4kb per tested_section.
> >>>>
> >>>>> Oh yeah that would have been better, not sure why I didn't do it
> >>>>> that way.
> >>>>> Although the max sections I saw was around 50, and it's usually
> >>>>> a lot less
> >>>>> so it's probably not worth the churn to change it now that
> >>>>> Arnaldo's applied
> >>>>> it?
> >>>>
> >>>> I see you submitted a patch for using strdup() and then there is a need
> >>>> for checking the strdup(), etc.
> >>>>
> >>>> Since at this point this is an improvement on a test and all is sitting
> >>>> in linux-next and the window is closing for v6.18, lets leave this for
> >>>> the next window, ok?
> >>>>
> >>>
> >>> Makes sense.
> >>>
> >>>> These would be good things for some tool to catch, before it gets sent,
> >>>> but that is another rabbit hole :-)
> >>>>
> >>>> Thanks,
> >>>>
> >>>> - Arnaldo
> >>>
> >>> Does Smatch work on Perf? I imagine it would catch this if it does. Or
> >>> just plain old cppcheck. I'll take a look.
> >>>
> >>> James
> >>>
> >>
> >> Smatch doesn't know about strdup and seems to be more focused on kernel so
> >> probably isn't a good fit.
> >>
> >
> > No one is going to write a check which tells people when to use a fixed
> > array vs when to allocate memory dynamically.  Those sorts of decisions
> > are too complicated.
> >
>
> I mean "doesn't know about strdup" in that it would have to know that
> it's an allocator and can return NULL which should be checked etc. Not
> that it should know about Ian's original suggestion to use strdup in the
> first place.

Ooh, is smatch smart about errptrs? perf is using the hashmap code
from libbpf and rather than doing the more conventional C thing of
setting errno and returning NULL from hashmap__new it encodes ENOMEM
as an errptr thereby breaking NULL tests. We seem to always forget
this and eye-ball checks of hashmap__new results being compared to
NULL are reasonable. For example this fix for the issue:
https://lore.kernel.org/lkml/20250917095422.60923-1-wangfushuai@baidu.com/
I believe in the kernel coccinelle will spot this but we don't have it
as part of the perf build.

I think to make the C compilers smart enough to catch this we need to
do something like:
struct ERRNO_OR(hashmap) hashmap__new(...)
where ERRNO_OR would be a macro to add a wrapping struct and then we'd
force users of the hashmap__new result to check if the value were
errno or a pointer.

Another source code analysis tool it'd be interesting to have on the
perf build would be clang-tidy.

Thanks,
Ian

> >> Cppcheck struggles with a lot of the kernely style that's used in Perf,
> >> especially the headers. We'd either have to silence a lot of the warnings,
> >> or start with almost no warnings enabled.
> >>
> >> It doesn't have a warning for usage of a malloc return value without NULL
> >> checking, so in this case it wouldn't be useful.
> >
> > In the kernel we care a lot about missing NULL checks but in userspace
> > it doesn't really matter in the same way...  It's easy enough to write
> > a check for this in Smatch.
> >
> >>
> >> I'm not 100% convinced that the effort of integrating one of these into the
> >> build system would be worth it. I know that once they're in, there would be
> >> constant maintenance of silencing false positives etc. And a lot of the
> >> warnings are more style or opinions about undefined behavior according to
> >> the standard, but aren't real based on what the compiler actually does.
> >>
> >
> > The thing about false positives is that people should just ignore all the
> > warnings except the new ones.  In the kernel, this works well because we
> > fix all the real bugs right away.  And if we haven't eventually someone
> > will look at the code again and after 10 years or so it all either gets
> > fixed or it doesn't matter.
> >
>
> This requires some infrastructure though. The easiest way I imagined it
> working would be more like how we have compiler warnings and -Werror
> currently.
>
> Not that we couldn't come up with some kind of infrastructure to track
> new errors. But I think it would be applied too sporadically to block
> people from sending a patch in the first place.
>
> >> As an example, cppcheck on code-reading.c with --enable=all gives 17
> >> portability, 11 style, 3 warning and 1 error outputs. Out of all of these
> >> only two of the warnings are significant, from commit 0f9ad973b095 ("perf
> >> tests code-reading: Handle change in objdump output from binutils >= 2.41 on
> >> riscv"):
> >>
> >>    token = strsep(&version, ".");
> >>    version_tmp = atoi(token);
> >>    if (token)
> >>        version_num += version_tmp * 100;
> >>
> >>    token = strsep(&version, ".");
> >>    version_tmp = atoi(token);
> >>    if (token)
> >>        version_num += version_tmp;
> >>
> >> "token" has already been passed to atoi() so can't be NULL in the if
> >> statement. I think the atoi() needs to come after the null check.
> >>
> >
> > Yep.  Smatch does cross function analysis so it would have caught that.
> > (I haven't tested).
> >
> > regards,
> > dan carpenter
> >
>
> I ran it on this file and it didn't say much. Although I don't know if
> I'm using it properly:
>
>    smatch --full-path -I. -I../include -I../lib/perf/include -Iutil -I../
>      arch/x86/include -I../lib tests/code-reading.c
>
>    tests/code-reading.c: note: in included file:
>
>    /usr/include/x86_64-linux-gnu//sys/param.h:93:10: warning:
> preprocessor token roundup redefined
>
>    tests/code-reading.c: note: in included file (through
> ../include/linux/kernel.h):
>    ../include/linux/math.h:17:9: this was the original definition
>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2025-10-08 15:07 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-06 13:11 [PATCH] perf tests: Don't retest sections in "Object code reading" James Clark
2025-10-06 15:21 ` Ian Rogers
2025-10-06 20:03   ` Arnaldo Carvalho de Melo
2025-10-07  9:10   ` James Clark
2025-10-07 13:16     ` Ian Rogers
2025-10-07 20:01     ` Arnaldo Carvalho de Melo
2025-10-08  8:32       ` James Clark
2025-10-08 10:21         ` James Clark
2025-10-08 13:39           ` Dan Carpenter
2025-10-08 14:11             ` James Clark
2025-10-08 15:07               ` Ian Rogers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).