* [PATCH 1/4] perf tools: Remove /SYSV from no_dso maps
2025-10-06 17:57 [PATCH 0/4] perf c2c: Detect shared memory cachelines Michael Petlan
@ 2025-10-06 17:57 ` Michael Petlan
2025-10-06 18:01 ` Ian Rogers
2025-10-06 17:57 ` [PATCH 2/4] perf c2c: Add shared mem flag Michael Petlan
` (3 subsequent siblings)
4 siblings, 1 reply; 10+ messages in thread
From: Michael Petlan @ 2025-10-06 17:57 UTC (permalink / raw)
To: linux-perf-users, acme, irogers, namhyung; +Cc: jmario, jolsa
We will need it for upcoming commits. Also adjust the function
calls in perf-inject, in order not to break the logic.
Suggested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Michael Petlan <mpetlan@redhat.com>
---
tools/perf/builtin-inject.c | 4 ++--
tools/perf/util/map.c | 5 +++--
tools/perf/util/map.h | 6 +++++-
3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index a114b3fa1bea..cc090da5c445 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -773,7 +773,7 @@ static int tool__inject_build_id(const struct perf_tool *tool,
if (is_anon_memory(filename) || flags & MAP_HUGETLB)
return 0;
- if (is_no_dso_memory(filename))
+ if (is_no_dso_memory(filename) || is_shared_memory(filename))
return 0;
if (inject->known_build_ids != NULL &&
@@ -813,7 +813,7 @@ static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
/* Return to repipe anonymous maps. */
if (is_anon_memory(filename) || flags & MAP_HUGETLB)
return 1;
- if (is_no_dso_memory(filename))
+ if (is_no_dso_memory(filename) || is_shared_memory(filename))
return 1;
if (dso__read_build_id(dso)) {
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index b46c68c24d1c..6d13b4602970 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -133,12 +133,13 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
if (ADD_RC_CHK(result, map)) {
char newfilename[PATH_MAX];
struct dso *dso;
- int anon, no_dso, vdso, android;
+ int anon, no_dso, vdso, android, shared;
android = is_android_lib(filename);
anon = is_anon_memory(filename) || flags & MAP_HUGETLB;
vdso = is_vdso_map(filename);
no_dso = is_no_dso_memory(filename);
+ shared = is_shared_memory(filename);
nsi = nsinfo__get(thread__nsinfo(thread));
if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
@@ -174,7 +175,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
assert(!dso__kernel(dso));
map__init(result, start, start + len, pgoff, dso, prot, flags);
- if (anon || no_dso) {
+ if (anon || no_dso || shared) {
map->mapping_type = MAPPING_TYPE__IDENTITY;
/*
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 9cadf533a561..616f32504e46 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -258,10 +258,14 @@ static inline int is_anon_memory(const char *filename)
static inline int is_no_dso_memory(const char *filename)
{
return !strncmp(filename, "[stack", 6) ||
- !strncmp(filename, "/SYSV", 5) ||
!strcmp(filename, "[heap]");
}
+static inline int is_shared_memory(const char *filename)
+{
+ return !strncmp(filename, "/SYSV", 5);
+}
+
static inline void map__set_start(struct map *map, u64 start)
{
RC_CHK_ACCESS(map)->start = start;
--
2.47.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 1/4] perf tools: Remove /SYSV from no_dso maps
2025-10-06 17:57 ` [PATCH 1/4] perf tools: Remove /SYSV from no_dso maps Michael Petlan
@ 2025-10-06 18:01 ` Ian Rogers
2025-10-06 18:28 ` Joe Mario
0 siblings, 1 reply; 10+ messages in thread
From: Ian Rogers @ 2025-10-06 18:01 UTC (permalink / raw)
To: Michael Petlan; +Cc: linux-perf-users, acme, namhyung, jmario, jolsa
On Mon, Oct 6, 2025 at 10:57 AM Michael Petlan <mpetlan@redhat.com> wrote:
>
> We will need it for upcoming commits. Also adjust the function
> calls in perf-inject, in order not to break the logic.
>
> Suggested-by: Jiri Olsa <jolsa@kernel.org>
>
> Signed-off-by: Michael Petlan <mpetlan@redhat.com>
> ---
> tools/perf/builtin-inject.c | 4 ++--
> tools/perf/util/map.c | 5 +++--
> tools/perf/util/map.h | 6 +++++-
> 3 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index a114b3fa1bea..cc090da5c445 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -773,7 +773,7 @@ static int tool__inject_build_id(const struct perf_tool *tool,
>
> if (is_anon_memory(filename) || flags & MAP_HUGETLB)
> return 0;
> - if (is_no_dso_memory(filename))
> + if (is_no_dso_memory(filename) || is_shared_memory(filename))
> return 0;
>
> if (inject->known_build_ids != NULL &&
> @@ -813,7 +813,7 @@ static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
> /* Return to repipe anonymous maps. */
> if (is_anon_memory(filename) || flags & MAP_HUGETLB)
> return 1;
> - if (is_no_dso_memory(filename))
> + if (is_no_dso_memory(filename) || is_shared_memory(filename))
> return 1;
>
> if (dso__read_build_id(dso)) {
> diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
> index b46c68c24d1c..6d13b4602970 100644
> --- a/tools/perf/util/map.c
> +++ b/tools/perf/util/map.c
> @@ -133,12 +133,13 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
> if (ADD_RC_CHK(result, map)) {
> char newfilename[PATH_MAX];
> struct dso *dso;
> - int anon, no_dso, vdso, android;
> + int anon, no_dso, vdso, android, shared;
>
> android = is_android_lib(filename);
> anon = is_anon_memory(filename) || flags & MAP_HUGETLB;
> vdso = is_vdso_map(filename);
> no_dso = is_no_dso_memory(filename);
> + shared = is_shared_memory(filename);
> nsi = nsinfo__get(thread__nsinfo(thread));
>
> if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
> @@ -174,7 +175,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
> assert(!dso__kernel(dso));
> map__init(result, start, start + len, pgoff, dso, prot, flags);
>
> - if (anon || no_dso) {
> + if (anon || no_dso || shared) {
> map->mapping_type = MAPPING_TYPE__IDENTITY;
>
> /*
> diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
> index 9cadf533a561..616f32504e46 100644
> --- a/tools/perf/util/map.h
> +++ b/tools/perf/util/map.h
> @@ -258,10 +258,14 @@ static inline int is_anon_memory(const char *filename)
> static inline int is_no_dso_memory(const char *filename)
> {
> return !strncmp(filename, "[stack", 6) ||
> - !strncmp(filename, "/SYSV", 5) ||
> !strcmp(filename, "[heap]");
> }
>
> +static inline int is_shared_memory(const char *filename)
> +{
> + return !strncmp(filename, "/SYSV", 5);
> +}
Shared memory could happen because of two mmaps having the same file
and offset. Is the mapping shared within or across processes? Perhaps
there is a clearer and more capable API hiding here.
Thanks,
Ian
> +
> static inline void map__set_start(struct map *map, u64 start)
> {
> RC_CHK_ACCESS(map)->start = start;
> --
> 2.47.3
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/4] perf tools: Remove /SYSV from no_dso maps
2025-10-06 18:01 ` Ian Rogers
@ 2025-10-06 18:28 ` Joe Mario
2025-10-06 19:21 ` Ian Rogers
0 siblings, 1 reply; 10+ messages in thread
From: Joe Mario @ 2025-10-06 18:28 UTC (permalink / raw)
To: Ian Rogers, Michael Petlan; +Cc: linux-perf-users, acme, namhyung, jolsa
On 10/6/25 2:01 PM, Ian Rogers wrote:
> On Mon, Oct 6, 2025 at 10:57 AM Michael Petlan <mpetlan@redhat.com> wrote:
>>
>> We will need it for upcoming commits. Also adjust the function
>> calls in perf-inject, in order not to break the logic.
>>
>> Suggested-by: Jiri Olsa <jolsa@kernel.org>
>>
>> Signed-off-by: Michael Petlan <mpetlan@redhat.com>
>> ---
>> tools/perf/builtin-inject.c | 4 ++--
>> tools/perf/util/map.c | 5 +++--
>> tools/perf/util/map.h | 6 +++++-
>> 3 files changed, 10 insertions(+), 5 deletions(-)
>>
>> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
>> index a114b3fa1bea..cc090da5c445 100644
>> --- a/tools/perf/builtin-inject.c
>> +++ b/tools/perf/builtin-inject.c
>> @@ -773,7 +773,7 @@ static int tool__inject_build_id(const struct perf_tool *tool,
>>
>> if (is_anon_memory(filename) || flags & MAP_HUGETLB)
>> return 0;
>> - if (is_no_dso_memory(filename))
>> + if (is_no_dso_memory(filename) || is_shared_memory(filename))
>> return 0;
>>
>> if (inject->known_build_ids != NULL &&
>> @@ -813,7 +813,7 @@ static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
>> /* Return to repipe anonymous maps. */
>> if (is_anon_memory(filename) || flags & MAP_HUGETLB)
>> return 1;
>> - if (is_no_dso_memory(filename))
>> + if (is_no_dso_memory(filename) || is_shared_memory(filename))
>> return 1;
>>
>> if (dso__read_build_id(dso)) {
>> diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
>> index b46c68c24d1c..6d13b4602970 100644
>> --- a/tools/perf/util/map.c
>> +++ b/tools/perf/util/map.c
>> @@ -133,12 +133,13 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
>> if (ADD_RC_CHK(result, map)) {
>> char newfilename[PATH_MAX];
>> struct dso *dso;
>> - int anon, no_dso, vdso, android;
>> + int anon, no_dso, vdso, android, shared;
>>
>> android = is_android_lib(filename);
>> anon = is_anon_memory(filename) || flags & MAP_HUGETLB;
>> vdso = is_vdso_map(filename);
>> no_dso = is_no_dso_memory(filename);
>> + shared = is_shared_memory(filename);
>> nsi = nsinfo__get(thread__nsinfo(thread));
>>
>> if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
>> @@ -174,7 +175,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
>> assert(!dso__kernel(dso));
>> map__init(result, start, start + len, pgoff, dso, prot, flags);
>>
>> - if (anon || no_dso) {
>> + if (anon || no_dso || shared) {
>> map->mapping_type = MAPPING_TYPE__IDENTITY;
>>
>> /*
>> diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
>> index 9cadf533a561..616f32504e46 100644
>> --- a/tools/perf/util/map.h
>> +++ b/tools/perf/util/map.h
>> @@ -258,10 +258,14 @@ static inline int is_anon_memory(const char *filename)
>> static inline int is_no_dso_memory(const char *filename)
>> {
>> return !strncmp(filename, "[stack", 6) ||
>> - !strncmp(filename, "/SYSV", 5) ||
>> !strcmp(filename, "[heap]");
>> }
>>
>> +static inline int is_shared_memory(const char *filename)
>> +{
>> + return !strncmp(filename, "/SYSV", 5);
>> +}
>
> Shared memory could happen because of two mmaps having the same file
> and offset. Is the mapping shared within or across processes? Perhaps
> there is a clearer and more capable API hiding here.
Hi Ian:
We see it happening both within a process and across processes, where the code does something like this:
shmid = shmget((key_t)0, 8192, IPC_CREAT|0777);
shmat_addr1 = shmat(shmid, (char *)0, 0);
shmat_addr2 = shmat(shmid, (char *)0, 0);
The separate shmat() calls can either be in the same process or different processes. As long as they use the same shmid and a NULL address, they end up with different virtual addresses pointing to the same location in shared memory.
Then if cacheline contention occurs from accesses to shmat_addr1 and shmat_addr2, perf c2c cannot detect it.
I hope this helps.
Thanks,
Joe
>
> Thanks,
> Ian
>
>> +
>> static inline void map__set_start(struct map *map, u64 start)
>> {
>> RC_CHK_ACCESS(map)->start = start;
>> --
>> 2.47.3
>>
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/4] perf tools: Remove /SYSV from no_dso maps
2025-10-06 18:28 ` Joe Mario
@ 2025-10-06 19:21 ` Ian Rogers
0 siblings, 0 replies; 10+ messages in thread
From: Ian Rogers @ 2025-10-06 19:21 UTC (permalink / raw)
To: Joe Mario; +Cc: Michael Petlan, linux-perf-users, acme, namhyung, jolsa
On Mon, Oct 6, 2025 at 11:28 AM Joe Mario <jmario@redhat.com> wrote:
>
>
>
> On 10/6/25 2:01 PM, Ian Rogers wrote:
> > On Mon, Oct 6, 2025 at 10:57 AM Michael Petlan <mpetlan@redhat.com> wrote:
> >>
> >> We will need it for upcoming commits. Also adjust the function
> >> calls in perf-inject, in order not to break the logic.
> >>
> >> Suggested-by: Jiri Olsa <jolsa@kernel.org>
> >>
> >> Signed-off-by: Michael Petlan <mpetlan@redhat.com>
> >> ---
> >> tools/perf/builtin-inject.c | 4 ++--
> >> tools/perf/util/map.c | 5 +++--
> >> tools/perf/util/map.h | 6 +++++-
> >> 3 files changed, 10 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> >> index a114b3fa1bea..cc090da5c445 100644
> >> --- a/tools/perf/builtin-inject.c
> >> +++ b/tools/perf/builtin-inject.c
> >> @@ -773,7 +773,7 @@ static int tool__inject_build_id(const struct perf_tool *tool,
> >>
> >> if (is_anon_memory(filename) || flags & MAP_HUGETLB)
> >> return 0;
> >> - if (is_no_dso_memory(filename))
> >> + if (is_no_dso_memory(filename) || is_shared_memory(filename))
> >> return 0;
> >>
> >> if (inject->known_build_ids != NULL &&
> >> @@ -813,7 +813,7 @@ static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
> >> /* Return to repipe anonymous maps. */
> >> if (is_anon_memory(filename) || flags & MAP_HUGETLB)
> >> return 1;
> >> - if (is_no_dso_memory(filename))
> >> + if (is_no_dso_memory(filename) || is_shared_memory(filename))
> >> return 1;
> >>
> >> if (dso__read_build_id(dso)) {
> >> diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
> >> index b46c68c24d1c..6d13b4602970 100644
> >> --- a/tools/perf/util/map.c
> >> +++ b/tools/perf/util/map.c
> >> @@ -133,12 +133,13 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
> >> if (ADD_RC_CHK(result, map)) {
> >> char newfilename[PATH_MAX];
> >> struct dso *dso;
> >> - int anon, no_dso, vdso, android;
> >> + int anon, no_dso, vdso, android, shared;
> >>
> >> android = is_android_lib(filename);
> >> anon = is_anon_memory(filename) || flags & MAP_HUGETLB;
> >> vdso = is_vdso_map(filename);
> >> no_dso = is_no_dso_memory(filename);
> >> + shared = is_shared_memory(filename);
> >> nsi = nsinfo__get(thread__nsinfo(thread));
> >>
> >> if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
> >> @@ -174,7 +175,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
> >> assert(!dso__kernel(dso));
> >> map__init(result, start, start + len, pgoff, dso, prot, flags);
> >>
> >> - if (anon || no_dso) {
> >> + if (anon || no_dso || shared) {
> >> map->mapping_type = MAPPING_TYPE__IDENTITY;
> >>
> >> /*
> >> diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
> >> index 9cadf533a561..616f32504e46 100644
> >> --- a/tools/perf/util/map.h
> >> +++ b/tools/perf/util/map.h
> >> @@ -258,10 +258,14 @@ static inline int is_anon_memory(const char *filename)
> >> static inline int is_no_dso_memory(const char *filename)
> >> {
> >> return !strncmp(filename, "[stack", 6) ||
> >> - !strncmp(filename, "/SYSV", 5) ||
> >> !strcmp(filename, "[heap]");
> >> }
> >>
> >> +static inline int is_shared_memory(const char *filename)
> >> +{
> >> + return !strncmp(filename, "/SYSV", 5);
> >> +}
> >
> > Shared memory could happen because of two mmaps having the same file
> > and offset. Is the mapping shared within or across processes? Perhaps
> > there is a clearer and more capable API hiding here.
>
> Hi Ian:
> We see it happening both within a process and across processes, where the code does something like this:
>
> shmid = shmget((key_t)0, 8192, IPC_CREAT|0777);
> shmat_addr1 = shmat(shmid, (char *)0, 0);
> shmat_addr2 = shmat(shmid, (char *)0, 0);
>
> The separate shmat() calls can either be in the same process or different processes. As long as they use the same shmid and a NULL address, they end up with different virtual addresses pointing to the same location in shared memory.
>
> Then if cacheline contention occurs from accesses to shmat_addr1 and shmat_addr2, perf c2c cannot detect it.
>
> I hope this helps.
> Thanks,
> Joe
Thanks Joe, it helps :-)
There is a maps API for a collection of mmaps within a process, and
the map API is for pretty much for maps within it - the special case
kernel maps even have a pointer back to their owning maps instance.
Looking at the use of is_no_dso_memory(..) I don't think we want to
change the cases when we're looking to inject build IDs to try to load
build IDs from files called files called "/SYSV" which is a
consequence of the change just above.
Looking at the use of is_no_dso_memory in map__new I'm getting confused :-)
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/map.c?h=perf-tools-next#n171
```
no_dso = is_no_dso_memory(filename);
...
if (vdso) {
...
dso = machine__findnew_vdso(machine, thread);
} else
dso = machine__findnew_dso_id(machine, filename, id);
if (dso == NULL)
goto out_delete;
...
if (anon || no_dso) {
```
given that `dso` cannot be NULL then I don't see how the `no_dso` test
can be useful and therefore the whole no_dso usage here seems
unnecessary.
Anyway, that's is_no_dso_memory, I wonder if we want to know if a map
is shared we should add a helper something like:
```
enum shared_mem {
SHARED_MEM__UNKNOWN = 0,
SHARED_MEM__NO = 1,
SHARED_MEM__YES = 2,
};
DECLARE_RC_STRUCT(map) {
...
enum shared_mem shared_mem : 2;
};
bool map__is_shared(map, maps)
{
enum shared_mem state = map__shared_mem(map);
struct map *shared_map;
if (state != SHARED_MEM__UNKNOWN)
return state;
shared_map = maps__shared_map(maps, map);
state = shared_map ? SHARED_MEM__YES : SHARED_MEM__NO;
map__set_shared_mem(map, state);
return state;
}
struct maps__shared_map_cb_args {
struct map *map, *shared;
};
static int maps__shared_map_cb(struct map *map, void *data)
{
struct maps__shared_map_cb_args *args = data;
if (RC_CHK_EQUAL(map, args->map))
return 0; // Skip the shared map being looked for.
if (map__dso(map) != map__dso(args->map) || map__pgoff(map) !=
map__pgoff(args->map))
return 0; // Not a match, keep searching.
args->shared = map;
return 1; // End search now a shared mapping is found.
}
// Returns the first mmap in maps with the same dso and offset as map.
struct map *maps__shared_map(maps, map)
{
struct maps__shared_map_cb_args args = { .map = map, };
maps__for_each_map(maps, maps__shared_map_cb, &args);
return args.shared;
}
```
We can also update the map__new case to set the flag on the map based
on the filename being "/SYSV". This will mean rather than in the uses
of is_shared_memory in the current patches here they are changed to
calls to map__is_shared.
Thanks,
Ian
> >
> > Thanks,
> > Ian
> >
> >> +
> >> static inline void map__set_start(struct map *map, u64 start)
> >> {
> >> RC_CHK_ACCESS(map)->start = start;
> >> --
> >> 2.47.3
> >>
> >
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 2/4] perf c2c: Add shared mem flag
2025-10-06 17:57 [PATCH 0/4] perf c2c: Detect shared memory cachelines Michael Petlan
2025-10-06 17:57 ` [PATCH 1/4] perf tools: Remove /SYSV from no_dso maps Michael Petlan
@ 2025-10-06 17:57 ` Michael Petlan
2025-10-06 17:57 ` [PATCH 3/4] perf c2c: Add map name for cacheline Michael Petlan
` (2 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: Michael Petlan @ 2025-10-06 17:57 UTC (permalink / raw)
To: linux-perf-users, acme, irogers, namhyung; +Cc: jmario, jolsa
In perf-c2c report, it was impossible to detect shared memory. That
may hide important facts about the cache-line usage.
Add an "S" flag to the Shared Data Cache Line Table marking entries
using shared memory.
Suggested-by: Joe Mario <jmario@redhat.com>
Suggested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Michael Petlan <mpetlan@redhat.com>
---
tools/perf/builtin-c2c.c | 49 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 9e9ff471ddd1..ab77ea7b188c 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -19,6 +19,7 @@
#include <linux/zalloc.h>
#include <asm/bug.h>
#include <sys/param.h>
+#include <sys/mman.h>
#include "debug.h"
#include "builtin.h"
#include <perf/cpumap.h>
@@ -45,6 +46,14 @@
#include "pmus.h"
#include "string2.h"
#include "util/util.h"
+#include "dso.h"
+#include "map.h"
+
+enum shared_mem {
+ SHARED_MEM__UNKNOWN = -1,
+ SHARED_MEM__NO = 0,
+ SHARED_MEM__YES = 1,
+};
struct c2c_hists {
struct hists hists;
@@ -74,6 +83,7 @@ struct c2c_hist_entry {
unsigned long paddr_cnt;
bool paddr_zero;
char *nodestr;
+ enum shared_mem shared_mem;
/*
* must be at the end,
@@ -275,6 +285,12 @@ static void compute_stats(struct c2c_hist_entry *c2c_he,
update_stats(&cstats->load, weight);
}
+static bool map_is_shared_memory(struct map *map)
+{
+ return map && map->flags & MAP_SHARED && map->dso &&
+ !strncmp(map->dso->name, "/SYSV", sizeof("/SYSV") - 1);
+}
+
static int process_sample_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -1330,6 +1346,28 @@ cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", width, "");
}
+static void
+c2c_he__resolve_shared_mem(struct c2c_hist_entry *c2c_he)
+{
+ if (c2c_he->shared_mem != SHARED_MEM__UNKNOWN)
+ c2c_he->shared_mem = map_is_shared_memory(c2c_he->he.mem_info->daddr.ms.map);
+}
+
+static int
+cl_shared_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_he__resolve_shared_mem(c2c_he);
+
+ scnprintf(buf, 10, c2c_he->shared_mem == SHARED_MEM__YES ? "S" : " ");
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -1810,6 +1848,14 @@ static struct c2c_dimension dim_dcacheline_num_empty = {
.width = 5,
};
+static struct c2c_dimension dim_dcacheline_shared = {
+ .header = HEADER_LOW(""),
+ .name = "cl_shared",
+ .cmp = empty_cmp,
+ .entry = cl_shared_entry,
+ .width = 1,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_dcacheline_node,
@@ -1866,6 +1912,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_dcacheline_idx,
&dim_dcacheline_num,
&dim_dcacheline_num_empty,
+ &dim_dcacheline_shared,
NULL,
};
@@ -3150,6 +3197,7 @@ static int perf_c2c__report(int argc, const char **argv)
if (c2c.display != DISPLAY_SNP_PEER)
output_str = "cl_idx,"
+ "cl_shared,"
"dcacheline,"
"dcacheline_node,"
"dcacheline_count,"
@@ -3165,6 +3213,7 @@ static int perf_c2c__report(int argc, const char **argv)
"dram_lcl,dram_rmt";
else
output_str = "cl_idx,"
+ "cl_shared,"
"dcacheline,"
"dcacheline_node,"
"dcacheline_count,"
--
2.47.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/4] perf c2c: Add map name for cacheline
2025-10-06 17:57 [PATCH 0/4] perf c2c: Detect shared memory cachelines Michael Petlan
2025-10-06 17:57 ` [PATCH 1/4] perf tools: Remove /SYSV from no_dso maps Michael Petlan
2025-10-06 17:57 ` [PATCH 2/4] perf c2c: Add shared mem flag Michael Petlan
@ 2025-10-06 17:57 ` Michael Petlan
2025-10-06 17:57 ` [PATCH 4/4] perf c2c report: Add --detect-shm option Michael Petlan
2025-10-07 8:16 ` [PATCH 0/4] perf c2c: Detect shared memory cachelines Namhyung Kim
4 siblings, 0 replies; 10+ messages in thread
From: Michael Petlan @ 2025-10-06 17:57 UTC (permalink / raw)
To: linux-perf-users, acme, irogers, namhyung; +Cc: jmario, jolsa
Add column showing address range of the entries in Shared Data Cache
Line Table.
Suggested-by: Joe Mario <jmario@redhat.com>
Suggested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Michael Petlan <mpetlan@redhat.com>
---
tools/perf/builtin-c2c.c | 35 +++++++++++++++++++++++++++++++++--
1 file changed, 33 insertions(+), 2 deletions(-)
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index ab77ea7b188c..78bcc18b7891 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -1368,6 +1368,28 @@ cl_shared_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
}
+static int
+cl_map_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[100];
+ struct map *map = he->mem_info->daddr.ms.map;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_he__resolve_shared_mem(c2c_he);
+
+ if (map) {
+ scnprintf(buf, 100, "%lx-%lx %s", map->start, map->end,
+ map->dso ? map->dso->name : "N/A");
+ } else {
+ scnprintf(buf, 100, "N/A");
+ }
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
#define HEADER_LOW(__h) \
{ \
.line[1] = { \
@@ -1856,6 +1878,14 @@ static struct c2c_dimension dim_dcacheline_shared = {
.width = 1,
};
+static struct c2c_dimension dim_dcacheline_map = {
+ .header = HEADER_LOW("Map"),
+ .name = "cl_map",
+ .cmp = empty_cmp,
+ .entry = cl_map_entry,
+ .width = 3,
+};
+
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
&dim_dcacheline_node,
@@ -1913,6 +1943,7 @@ static struct c2c_dimension *dimensions[] = {
&dim_dcacheline_num,
&dim_dcacheline_num_empty,
&dim_dcacheline_shared,
+ &dim_dcacheline_map,
NULL,
};
@@ -3210,7 +3241,7 @@ static int perf_c2c__report(int argc, const char **argv)
"ld_fbhit,ld_l1hit,ld_l2hit,"
"ld_lclhit,lcl_hitm,"
"ld_rmthit,rmt_hitm,"
- "dram_lcl,dram_rmt";
+ "dram_lcl,dram_rmt,cl_map";
else
output_str = "cl_idx,"
"cl_shared,"
@@ -3226,7 +3257,7 @@ static int perf_c2c__report(int argc, const char **argv)
"ld_fbhit,ld_l1hit,ld_l2hit,"
"ld_lclhit,lcl_hitm,"
"ld_rmthit,rmt_hitm,"
- "dram_lcl,dram_rmt";
+ "dram_lcl,dram_rmt,cl_map";
if (c2c.display == DISPLAY_TOT_HITM)
sort_str = "tot_hitm";
--
2.47.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 4/4] perf c2c report: Add --detect-shm option
2025-10-06 17:57 [PATCH 0/4] perf c2c: Detect shared memory cachelines Michael Petlan
` (2 preceding siblings ...)
2025-10-06 17:57 ` [PATCH 3/4] perf c2c: Add map name for cacheline Michael Petlan
@ 2025-10-06 17:57 ` Michael Petlan
2025-10-07 8:16 ` [PATCH 0/4] perf c2c: Detect shared memory cachelines Namhyung Kim
4 siblings, 0 replies; 10+ messages in thread
From: Michael Petlan @ 2025-10-06 17:57 UTC (permalink / raw)
To: linux-perf-users, acme, irogers, namhyung; +Cc: jmario, jolsa
Add an option that allows merging shared cachelines. In order to
better understand the problem, another column with physical memory
address is added to the Cacheline dimension.
Since the "PA Cnt" column contains obviously incorrect data, remove
it from Shared Data Cache Line Table to save space.
Suggested-by: Jiri Olsa <jolsa@kernel.org>
Suggested-by: Joe Mario <jmario@redhat.com>
Signed-off-by: Michael Petlan <mpetlan@redhat.com>
---
tools/perf/builtin-c2c.c | 102 +++++++++++++++++++++++++--------------
1 file changed, 66 insertions(+), 36 deletions(-)
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 78bcc18b7891..6f9a65528654 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -111,6 +111,7 @@ struct perf_c2c {
bool stats_only;
bool symbol_full;
bool stitch_lbr;
+ bool phys;
/* Shared cache line stats */
struct c2c_stats shared_clines_stats;
@@ -329,6 +330,13 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
goto out;
}
+ /* Keep only accesses to shared memory for --phys mode. */
+ if (c2c.phys && !map_is_shared_memory(mi->daddr.ms.map)) {
+ mem_info__put(mi);
+ ret = 0;
+ goto out;
+ }
+
/*
* The mi object is released in hists__add_entry_ops,
* if it gets sorted out into existing data, so we need
@@ -515,11 +523,20 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
__s; \
})
+static int64_t
+sort__dcacheline_phys_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->mem_info->daddr.phys_addr - right->mem_info->daddr.phys_addr;
+}
+
static int64_t
dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
{
- return sort__dcacheline_cmp(left, right);
+ if (c2c.phys)
+ return sort__dcacheline_phys_cmp(left, right);
+ else
+ return sort__dcacheline_cmp(left, right);
}
static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -535,6 +552,20 @@ static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
}
+static int
+dcacheline_phys_addr_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ uint64_t addr = 0;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[20];
+
+ if (he->mem_info)
+ addr = cl_address(mem_info__daddr(he->mem_info)->phys_addr, chk_double_cl);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
+}
+
static int
dcacheline_node_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
@@ -1433,6 +1464,14 @@ static struct c2c_dimension dim_dcacheline = {
.width = 18,
};
+static struct c2c_dimension dim_dcacheline_phys_addr = {
+ .header = HEADER_LOW("Phys Address"),
+ .name = "dcacheline_phys_addr",
+ .cmp = empty_cmp,
+ .entry = dcacheline_phys_addr_entry,
+ .width = 18,
+};
+
static struct c2c_dimension dim_dcacheline_node = {
.header = HEADER_LOW("Node"),
.name = "dcacheline_node",
@@ -1888,6 +1927,7 @@ static struct c2c_dimension dim_dcacheline_map = {
static struct c2c_dimension *dimensions[] = {
&dim_dcacheline,
+ &dim_dcacheline_phys_addr,
&dim_dcacheline_node,
&dim_dcacheline_count,
&dim_offset,
@@ -2869,7 +2909,7 @@ static int ui_quirks(void)
buf = fill_line(chk_double_cl ? "Double-Cacheline" : "Cacheline",
dim_dcacheline.width +
dim_dcacheline_node.width +
- dim_dcacheline_count.width + 4);
+ (c2c.phys ? dim_dcacheline_phys_addr.width : dim_dcacheline_count.width) + 4);
if (!buf)
return -ENOMEM;
@@ -2878,6 +2918,7 @@ static int ui_quirks(void)
/* Fix the zero line for offset column. */
buf = fill_line(nodestr, dim_offset.width +
dim_offset_node.width +
+ (c2c.phys ? dim_dcacheline_phys_addr.width + 2 : 0) +
dim_dcacheline_count.width + 4);
if (!buf)
return -ENOMEM;
@@ -3004,7 +3045,7 @@ static int build_cl_output(char *cl_sort, bool no_source)
}
if (asprintf(&c2c.cl_output,
- "%s%s%s%s%s%s%s%s%s%s%s%s",
+ "%s%s%s%s%s%s%s%s%s%s%s%s%s",
c2c.use_stdio ? "cl_num_empty," : "",
c2c.display == DISPLAY_SNP_PEER ? "percent_rmt_peer,"
"percent_lcl_peer," :
@@ -3014,6 +3055,7 @@ static int build_cl_output(char *cl_sort, bool no_source)
"percent_stores_l1miss,"
"percent_stores_na,"
"offset,offset_node,dcacheline_count,",
+ c2c.phys ? "dcacheline_phys_addr," : "",
add_pid ? "pid," : "",
add_tid ? "tid," : "",
add_iaddr ? "iaddr," : "",
@@ -3100,6 +3142,7 @@ static int perf_c2c__report(int argc, const char **argv)
"Do not display Source Line column"),
OPT_BOOLEAN(0, "show-all", &c2c.show_all,
"Show all captured HITM lines."),
+ OPT_BOOLEAN(0, "detect-shm", &c2c.phys, "Merge shared cachelines"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
callchain_help, &parse_callchain_opt,
@@ -3115,7 +3158,8 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_END()
};
int err = 0;
- const char *output_str, *sort_str = NULL;
+ char *output_str;
+ const char *sort_str = NULL;
struct perf_env *env;
argc = parse_options(argc, argv, options, report_c2c_usage,
@@ -3226,38 +3270,22 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_mem2node;
}
- if (c2c.display != DISPLAY_SNP_PEER)
- output_str = "cl_idx,"
- "cl_shared,"
- "dcacheline,"
- "dcacheline_node,"
- "dcacheline_count,"
- "percent_costly_snoop,"
- "tot_hitm,lcl_hitm,rmt_hitm,"
- "tot_recs,"
- "tot_loads,"
- "tot_stores,"
- "stores_l1hit,stores_l1miss,stores_na,"
- "ld_fbhit,ld_l1hit,ld_l2hit,"
- "ld_lclhit,lcl_hitm,"
- "ld_rmthit,rmt_hitm,"
- "dram_lcl,dram_rmt,cl_map";
- else
- output_str = "cl_idx,"
- "cl_shared,"
- "dcacheline,"
- "dcacheline_node,"
- "dcacheline_count,"
- "percent_costly_snoop,"
- "tot_peer,lcl_peer,rmt_peer,"
- "tot_recs,"
- "tot_loads,"
- "tot_stores,"
- "stores_l1hit,stores_l1miss,stores_na,"
- "ld_fbhit,ld_l1hit,ld_l2hit,"
- "ld_lclhit,lcl_hitm,"
- "ld_rmthit,rmt_hitm,"
- "dram_lcl,dram_rmt,cl_map";
+ if (asprintf(&output_str, "%s%s%s%s%s",
+ "cl_idx,"
+ "cl_shared,"
+ "dcacheline,",
+ c2c.phys ? "dcacheline_phys_addr,dcacheline_node," : "dcacheline_node,dcacheline_count,",
+ "percent_costly_snoop,",
+ (c2c.display == DISPLAY_SNP_PEER) ? "tot_peer,lcl_peer,rmt_peer," : "tot_hitm,lcl_hitm,rmt_hitm,",
+ "tot_recs,"
+ "tot_loads,"
+ "tot_stores,"
+ "stores_l1hit,stores_l1miss,stores_na,"
+ "ld_fbhit,ld_l1hit,ld_l2hit,"
+ "ld_lclhit,lcl_hitm,"
+ "ld_rmthit,rmt_hitm,"
+ "dram_lcl,dram_rmt,cl_map") < 0)
+ goto out_mem2node;
if (c2c.display == DISPLAY_TOT_HITM)
sort_str = "tot_hitm";
@@ -3270,6 +3298,8 @@ static int perf_c2c__report(int argc, const char **argv)
c2c_hists__reinit(&c2c.hists, output_str, sort_str, perf_session__env(session));
+ free(output_str);
+
ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");
hists__collapse_resort(&c2c.hists.hists, NULL);
--
2.47.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 0/4] perf c2c: Detect shared memory cachelines
2025-10-06 17:57 [PATCH 0/4] perf c2c: Detect shared memory cachelines Michael Petlan
` (3 preceding siblings ...)
2025-10-06 17:57 ` [PATCH 4/4] perf c2c report: Add --detect-shm option Michael Petlan
@ 2025-10-07 8:16 ` Namhyung Kim
2025-10-07 14:38 ` Arnaldo Carvalho de Melo
4 siblings, 1 reply; 10+ messages in thread
From: Namhyung Kim @ 2025-10-07 8:16 UTC (permalink / raw)
To: Michael Petlan; +Cc: linux-perf-users, acme, irogers, jmario, jolsa
Hello,
On Mon, Oct 06, 2025 at 07:57:06PM +0200, Michael Petlan wrote:
> Hello.
>
> Currently, perf-c2c tool has no means to discover a scenario when
> there is a single cacheline in shared memory, which is mapped to
> multiple different virtual addresses.
>
> This patchset adds a mechanism to detect this. First, an "S" flag
> suggests that such scenario may occur (a cacheline in shared memory
> is captured). Additinally, another option is added (--detect-shm),
> which zooms to the SHM cacheline and shows the physical addresses too.
Any example output? With a test program would be ideal.
Thanks,
Namhyung
>
> Jiri Olsa has drafted this feature some time ago. I have rebased it
> to current kernel repository shape and reworked several things in it
> after some discussions with Joe Mario.
>
> Thanks for thoughts and comments.
>
> Michael
>
>
> Michael Petlan (4):
> perf tools: Remove /SYSV from no_dso maps
> perf c2c: Add shared mem flag
> perf c2c: Add map name for cacheline
> perf c2c report: Add --detect-shm option
>
> tools/perf/builtin-c2c.c | 178 +++++++++++++++++++++++++++++-------
> tools/perf/builtin-inject.c | 4 +-
> tools/perf/util/map.c | 5 +-
> tools/perf/util/map.h | 6 +-
> 4 files changed, 154 insertions(+), 39 deletions(-)
>
> --
> 2.47.3
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 0/4] perf c2c: Detect shared memory cachelines
2025-10-07 8:16 ` [PATCH 0/4] perf c2c: Detect shared memory cachelines Namhyung Kim
@ 2025-10-07 14:38 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 10+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-10-07 14:38 UTC (permalink / raw)
To: Namhyung Kim, Michael Petlan
Cc: linux-perf-users, acme, irogers, jmario, jolsa
On October 7, 2025 5:16:50 AM GMT-03:00, Namhyung Kim <namhyung@kernel.org> wrote:
>On Mon, Oct 06, 2025 at 07:57:06PM +0200, Michael Petlan wrote:
>> Currently, perf-c2c tool has no means to discover a scenario when
>> there is a single cacheline in shared memory, which is mapped to
>> multiple different virtual addresses.
>> This patchset adds a mechanism to detect this. First, an "S" flag
>> suggests that such scenario may occur (a cacheline in shared memory
>> is captured). Additinally, another option is added (--detect-shm),
>> which zooms to the SHM cacheline and shows the physical addresses too.
>Any example output? With a test program would be ideal.
Yeah, output paired with a test case is kinda required when adding new features.
Joe Mario provided the gist of one such program, that would be great to have as as a perf test simple workload (-w).
That then could be used to add a 'perf test' script to make sure the feature doesn't regress.
- Arnaldo
- Arnaldo
^ permalink raw reply [flat|nested] 10+ messages in thread