linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] perf evsel: Improve the evsel__open_strerror for EBUSY
@ 2024-10-31 22:39 Chun-Tse Shao
  2024-10-31 22:39 ` [PATCH 2/3] perf: Reveal PMU type in fdinfo Chun-Tse Shao
  2024-10-31 22:39 ` [PATCH 3/3] perf evsel: Find process with busy PMUs for EBUSY Chun-Tse Shao
  0 siblings, 2 replies; 9+ messages in thread
From: Chun-Tse Shao @ 2024-10-31 22:39 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Liang, Kan, Ze Gao, Chun-Tse Shao,
	Yang Jihong, Weilin Wang, linux-perf-users

From: Ian Rogers <irogers@google.com>

The existing EBUSY strerror message is:
```
The sys_perf_event_open() syscall returned with 16 (Device or resource busy) for event (intel_bts//).
"dmesg | grep -i perf" may provide additional information.
```
The dmesg won't be useful. What is more useful is knowing what
processes are potentially using the PMU, which some procfs scanning can
reveal. When parallel testing tests/shell/stat_all_pmu.sh this yields:
```
Testing intel_bts//
Error:
The PMU intel_bts counters are busy and in use by another process.
Possible processes:
2585882 perf list
2585902 perf list -j -o /tmp/__perf_test.list_output.json.KF9MY
2585904 perf list
2585911 perf record -e task-clock --filter period > 1 -o /dev/null --quiet true
2585912 perf list
2585915 perf list
2586042 /tmp/perf/perf record -asdg -e cpu-clock -o /tmp/perftool-testsuite_report.dIF/perf_report/perf.data -- sleep 2
2589078 perf record -g -e task-clock:u -o - perf test -w noploop
2589148 /tmp/perf/perf record --control=fifo:control,ack -e cpu-clock -m 1 sleep 10
2589379 perf --buildid-dir /tmp/perf.debug.Umx record --buildid-all -o /tmp/perf.data.YBm /tmp/perf.ex.MD5.ZQW
2589568 perf record -o /tmp/__perf_test.program.mtcZH/perf.data --branch-filter any,save_type,u -- perf test -w brstack
2589649 perf record --per-thread -o /tmp/__perf_test.perf.data.5d3dc perf test -w thloop
2589898 perf record -o /tmp/perf-test-script.BX2b27Dcnj/pp-perf.data --sample-cpu uname
```
Which gets a little closer to finding the issue.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/evsel.c | 79 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dbf9c8cee3c56..9a5b6a6f8d2e5 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3286,6 +3286,78 @@ static bool find_process(const char *name)
 	return ret ? false : true;
 }

+static int dump_perf_event_processes(char *msg, size_t size)
+{
+	DIR *proc_dir;
+	struct dirent *proc_entry;
+	int printed = 0;
+
+	proc_dir = opendir(procfs__mountpoint());
+	if (!proc_dir)
+		return 0;
+
+	/* Walk through the /proc directory. */
+	while ((proc_entry = readdir(proc_dir)) != NULL) {
+		char path[PATH_MAX];
+		DIR *fd_dir;
+		struct dirent *fd_entry;
+		int fd_dir_fd;
+
+		if ((proc_entry->d_type != DT_DIR) ||
+		     !strcmp(".", proc_entry->d_name) ||
+		     !strcmp("..", proc_entry->d_name))
+			continue;
+
+		scnprintf(path, sizeof(path), "%s/fd", proc_entry->d_name);
+		fd_dir_fd = openat(dirfd(proc_dir), path, O_DIRECTORY);
+		if (fd_dir_fd == -1)
+			continue;
+		fd_dir = fdopendir(fd_dir_fd);
+		if (!fd_dir) {
+			close(fd_dir_fd);
+			continue;
+		}
+		while ((fd_entry = readdir(fd_dir)) != NULL) {
+			ssize_t link_size;
+
+			if (fd_entry->d_type != DT_LNK)
+				continue;
+			link_size = readlinkat(fd_dir_fd, fd_entry->d_name, path, sizeof(path));
+			if (link_size < 0)
+				continue;
+			/* Take care as readlink doesn't null terminate the string. */
+			if (!strncmp(path, "anon_inode:[perf_event]", link_size)) {
+				int cmdline_fd;
+				ssize_t cmdline_size;
+
+				scnprintf(path, sizeof(path), "%s/cmdline", proc_entry->d_name);
+				cmdline_fd = openat(dirfd(proc_dir), path, O_RDONLY);
+				if (cmdline_fd == -1)
+					continue;
+				cmdline_size = read(cmdline_fd, path, sizeof(path) - 1);
+				close(cmdline_fd);
+				if (cmdline_size < 0)
+					continue;
+				path[cmdline_size] = '\0';
+				for (ssize_t i = 0; i < cmdline_size; i++) {
+					if (path[i] == '\0')
+						path[i] = ' ';
+				}
+
+				if (printed == 0)
+					printed += scnprintf(msg, size, "Possible processes:\n");
+
+				printed += scnprintf(msg + printed, size - printed,
+						"%s %s\n", proc_entry->d_name, path);
+				break;
+			}
+		}
+		closedir(fd_dir);
+	}
+	closedir(proc_dir);
+	return printed;
+}
+
 int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
 				     char *msg __maybe_unused,
 				     size_t size __maybe_unused)
@@ -3319,7 +3391,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			printed += scnprintf(msg, size,
 				"No permission to enable %s event.\n\n", evsel__name(evsel));

-		return scnprintf(msg + printed, size - printed,
+		return printed + scnprintf(msg + printed, size - printed,
 		 "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
 		 "access to performance monitoring and observability operations for processes\n"
 		 "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
@@ -3382,6 +3454,11 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			return scnprintf(msg, size,
 	"The PMU counters are busy/taken by another profiler.\n"
 	"We found oprofile daemon running, please stop it and try again.");
+		printed += scnprintf(
+			msg, size,
+			"The PMU %s counters are busy and in use by another process.\n",
+			evsel->pmu ? evsel->pmu->name : "");
+		return printed + dump_perf_event_processes(msg + printed, size - printed);
 		break;
 	case EINVAL:
 		if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
--
2.47.0.163.g1226f6d8fa-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] perf: Reveal PMU type in fdinfo
  2024-10-31 22:39 [PATCH 1/3] perf evsel: Improve the evsel__open_strerror for EBUSY Chun-Tse Shao
@ 2024-10-31 22:39 ` Chun-Tse Shao
  2024-11-01 16:02   ` Ian Rogers
  2024-11-05 12:24   ` Peter Zijlstra
  2024-10-31 22:39 ` [PATCH 3/3] perf evsel: Find process with busy PMUs for EBUSY Chun-Tse Shao
  1 sibling, 2 replies; 9+ messages in thread
From: Chun-Tse Shao @ 2024-10-31 22:39 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Liang, Kan, Ze Gao, Chun-Tse Shao,
	Yang Jihong, Weilin Wang, linux-perf-users

It gives useful info on knowing which PMUs are reserved by this process.
Also add extra attributes which would be useful.

```
Testing cycles
$ ./perf stat -e cycles &
$ cat /proc/`pidof perf`/fdinfo/3
pos:    0
flags:  02000002
mnt_id: 16
ino:    3081
perf_event-orig_type:   0
perf_event-attr.config1:        0
perf_event-attr.config2:        0
perf_event-attr.config3:        0

Testing L1-dcache-load-misses//
$ ./perf stat -e L1-dcache-load-misses &
$ cat /proc/`pidof perf`/fdinfo/3
pos:    0
flags:  02000002
mnt_id: 16
ino:    1072
perf_event-attr.type:   3
perf_event-attr.config: 65536
perf_event-attr.config1:        0
perf_event-attr.config2:        0
perf_event-attr.config3:        0
```

Signed-off-by: Chun-Tse Shao <ctshao@google.com>
---
 kernel/events/core.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index cdd09769e6c56..e0891c376fd9d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8,6 +8,7 @@
  *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  */

+#include "linux/seq_file.h"
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/cpu.h>
@@ -6820,6 +6821,17 @@ static int perf_fasync(int fd, struct file *filp, int on)
 	return 0;
 }

+static void perf_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct perf_event *event = f->private_data;
+
+	seq_printf(m, "perf_event-attr.type:\t%u\n", event->orig_type);
+	seq_printf(m, "perf_event-attr.config:\t%llu\n", event->attr.config);
+	seq_printf(m, "perf_event-attr.config1:\t%llu\n", event->attr.config1);
+	seq_printf(m, "perf_event-attr.config2:\t%llu\n", event->attr.config2);
+	seq_printf(m, "perf_event-attr.config3:\t%llu\n", event->attr.config3);
+}
+
 static const struct file_operations perf_fops = {
 	.release		= perf_release,
 	.read			= perf_read,
@@ -6828,6 +6840,7 @@ static const struct file_operations perf_fops = {
 	.compat_ioctl		= perf_compat_ioctl,
 	.mmap			= perf_mmap,
 	.fasync			= perf_fasync,
+	.show_fdinfo		= perf_show_fdinfo,
 };

 /*
--
2.47.0.163.g1226f6d8fa-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] perf evsel: Find process with busy PMUs for EBUSY
  2024-10-31 22:39 [PATCH 1/3] perf evsel: Improve the evsel__open_strerror for EBUSY Chun-Tse Shao
  2024-10-31 22:39 ` [PATCH 2/3] perf: Reveal PMU type in fdinfo Chun-Tse Shao
@ 2024-10-31 22:39 ` Chun-Tse Shao
  2024-11-01 16:14   ` Ian Rogers
  1 sibling, 1 reply; 9+ messages in thread
From: Chun-Tse Shao @ 2024-10-31 22:39 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Liang, Kan, Ze Gao, Chun-Tse Shao,
	Yang Jihong, Weilin Wang, linux-perf-users

It parses fdinfo with PMU type, comparing with the event which failed to
open, and report the processes causing EBUSY error.

```
Testing cycles and intel_pt//
$ ./perf stat -e cycles &
[1] 55569
$ ./perf stat -e intel_pt// &
[2] 55683
$ ./perf stat -e intel_pt//
Error:
The PMU intel_pt counters are busy and in use by another process.
Possible processes:
55683 ./perf stat -e intel_pt//
```
Only perf with intel_pt was reported.

Signed-off-by: Chun-Tse Shao <ctshao@google.com>
---
 tools/perf/util/evsel.c | 79 +++++++++++++++++++++++++++++------------
 1 file changed, 57 insertions(+), 22 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9a5b6a6f8d2e5..d2f7c19e023ec 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3286,7 +3286,8 @@ static bool find_process(const char *name)
 	return ret ? false : true;
 }

-static int dump_perf_event_processes(char *msg, size_t size)
+static int dump_perf_event_processes(const struct perf_event_attr *failed_attr,
+				     char *msg, size_t size)
 {
 	DIR *proc_dir;
 	struct dirent *proc_entry;
@@ -3327,29 +3328,61 @@ static int dump_perf_event_processes(char *msg, size_t size)
 				continue;
 			/* Take care as readlink doesn't null terminate the string. */
 			if (!strncmp(path, "anon_inode:[perf_event]", link_size)) {
-				int cmdline_fd;
-				ssize_t cmdline_size;
-
-				scnprintf(path, sizeof(path), "%s/cmdline", proc_entry->d_name);
-				cmdline_fd = openat(dirfd(proc_dir), path, O_RDONLY);
-				if (cmdline_fd == -1)
-					continue;
-				cmdline_size = read(cmdline_fd, path, sizeof(path) - 1);
-				close(cmdline_fd);
-				if (cmdline_size < 0)
+				int fdinfo_fd;
+				ssize_t fdinfo_size;
+				char *line;
+				u32 perf_event_type = PERF_TYPE_MAX;
+
+				/* Let's check the PMU type reserved by this process */
+				scnprintf(path, sizeof(path), "%s/fdinfo/%s",
+					  proc_entry->d_name, fd_entry->d_name);
+				fdinfo_fd = openat(dirfd(proc_dir), path, O_RDONLY);
+				fdinfo_size = read(fdinfo_fd, path, sizeof(path) - 1);
+				if (fdinfo_size < 0)
 					continue;
-				path[cmdline_size] = '\0';
-				for (ssize_t i = 0; i < cmdline_size; i++) {
-					if (path[i] == '\0')
-						path[i] = ' ';
+				path[fdinfo_size] = '\0';
+
+				line = strtok(path, "\n");
+				while (line != NULL) {
+					if (sscanf(line,
+						   "perf_event-attr.type:\t%u",
+						   &perf_event_type) == 1)
+						break;
+					line = strtok(NULL, "\n");
 				}

-				if (printed == 0)
-					printed += scnprintf(msg, size, "Possible processes:\n");
-
-				printed += scnprintf(msg + printed, size - printed,
-						"%s %s\n", proc_entry->d_name, path);
-				break;
+				/* Report the process which reserves the conflicted PMU. */
+				/* If fdinfo does not contain PMU type, report it too. */
+				if (perf_event_type == failed_attr->type ||
+				    perf_event_type == PERF_TYPE_MAX) {
+					int cmdline_fd;
+					ssize_t cmdline_size;
+
+					scnprintf(path, sizeof(path),
+						  "%s/cmdline",
+						  proc_entry->d_name);
+					cmdline_fd = openat(dirfd(proc_dir), path, O_RDONLY);
+					if (cmdline_fd == -1)
+						continue;
+					cmdline_size = read(cmdline_fd, path, sizeof(path) - 1);
+					close(cmdline_fd);
+					if (cmdline_size < 0)
+						continue;
+					path[cmdline_size] = '\0';
+					for (ssize_t i = 0; i < cmdline_size; i++) {
+						if (path[i] == '\0')
+							path[i] = ' ';
+					}
+
+					if (printed == 0)
+						printed += scnprintf(
+							msg, size,
+							"Possible processes:\n");
+
+					printed += scnprintf(msg + printed, size - printed,
+							"%s %s\n", proc_entry->d_name, path);
+					break;
+				}
 			}
 		}
 		closedir(fd_dir);
@@ -3458,7 +3491,9 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			msg, size,
 			"The PMU %s counters are busy and in use by another process.\n",
 			evsel->pmu ? evsel->pmu->name : "");
-		return printed + dump_perf_event_processes(msg + printed, size - printed);
+		return printed + dump_perf_event_processes(&evsel->core.attr,
+							   msg + printed,
+							   size - printed);
 		break;
 	case EINVAL:
 		if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
--
2.47.0.163.g1226f6d8fa-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] perf: Reveal PMU type in fdinfo
  2024-10-31 22:39 ` [PATCH 2/3] perf: Reveal PMU type in fdinfo Chun-Tse Shao
@ 2024-11-01 16:02   ` Ian Rogers
  2024-11-01 21:24     ` Chun-Tse Shao
  2024-11-05 12:24   ` Peter Zijlstra
  1 sibling, 1 reply; 9+ messages in thread
From: Ian Rogers @ 2024-11-01 16:02 UTC (permalink / raw)
  To: Chun-Tse Shao
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Liang, Kan, Ze Gao,
	Yang Jihong, Weilin Wang, linux-perf-users

On Thu, Oct 31, 2024 at 3:39 PM Chun-Tse Shao <ctshao@google.com> wrote:
>
> It gives useful info on knowing which PMUs are reserved by this process.
> Also add extra attributes which would be useful.
>
> ```
> Testing cycles
> $ ./perf stat -e cycles &
> $ cat /proc/`pidof perf`/fdinfo/3
> pos:    0
> flags:  02000002
> mnt_id: 16
> ino:    3081
> perf_event-orig_type:   0

nit: I think this should be:
perf_event-type:   0
this output was from an earlier version.

> perf_event-attr.config1:        0
> perf_event-attr.config2:        0
> perf_event-attr.config3:        0
>
> Testing L1-dcache-load-misses//
> $ ./perf stat -e L1-dcache-load-misses &
> $ cat /proc/`pidof perf`/fdinfo/3
> pos:    0
> flags:  02000002
> mnt_id: 16
> ino:    1072
> perf_event-attr.type:   3
> perf_event-attr.config: 65536
> perf_event-attr.config1:        0
> perf_event-attr.config2:        0
> perf_event-attr.config3:        0
> ```
>
> Signed-off-by: Chun-Tse Shao <ctshao@google.com>
> ---
>  kernel/events/core.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index cdd09769e6c56..e0891c376fd9d 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -8,6 +8,7 @@
>   *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
>   */
>
> +#include "linux/seq_file.h"

nit: I think you should use angle < > rather than quotes on the
include for consistency.

Thanks,
Ian

>  #include <linux/fs.h>
>  #include <linux/mm.h>
>  #include <linux/cpu.h>
> @@ -6820,6 +6821,17 @@ static int perf_fasync(int fd, struct file *filp, int on)
>         return 0;
>  }
>
> +static void perf_show_fdinfo(struct seq_file *m, struct file *f)
> +{
> +       struct perf_event *event = f->private_data;
> +
> +       seq_printf(m, "perf_event-attr.type:\t%u\n", event->orig_type);
> +       seq_printf(m, "perf_event-attr.config:\t%llu\n", event->attr.config);
> +       seq_printf(m, "perf_event-attr.config1:\t%llu\n", event->attr.config1);
> +       seq_printf(m, "perf_event-attr.config2:\t%llu\n", event->attr.config2);
> +       seq_printf(m, "perf_event-attr.config3:\t%llu\n", event->attr.config3);
> +}
> +
>  static const struct file_operations perf_fops = {
>         .release                = perf_release,
>         .read                   = perf_read,
> @@ -6828,6 +6840,7 @@ static const struct file_operations perf_fops = {
>         .compat_ioctl           = perf_compat_ioctl,
>         .mmap                   = perf_mmap,
>         .fasync                 = perf_fasync,
> +       .show_fdinfo            = perf_show_fdinfo,
>  };
>
>  /*
> --
> 2.47.0.163.g1226f6d8fa-goog
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] perf evsel: Find process with busy PMUs for EBUSY
  2024-10-31 22:39 ` [PATCH 3/3] perf evsel: Find process with busy PMUs for EBUSY Chun-Tse Shao
@ 2024-11-01 16:14   ` Ian Rogers
  2024-11-01 21:32     ` Chun-Tse Shao
  0 siblings, 1 reply; 9+ messages in thread
From: Ian Rogers @ 2024-11-01 16:14 UTC (permalink / raw)
  To: Chun-Tse Shao
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Kan, Ze Gao,
	Yang Jihong, Weilin Wang, linux-perf-users

On Thu, Oct 31, 2024 at 3:39 PM Chun-Tse Shao <ctshao@google.com> wrote:
>
> It parses fdinfo with PMU type, comparing with the event which failed to
> open, and report the processes causing EBUSY error.
>
> ```
> Testing cycles and intel_pt//
> $ ./perf stat -e cycles &
> [1] 55569
> $ ./perf stat -e intel_pt// &
> [2] 55683
> $ ./perf stat -e intel_pt//
> Error:
> The PMU intel_pt counters are busy and in use by another process.
> Possible processes:
> 55683 ./perf stat -e intel_pt//
> ```
> Only perf with intel_pt was reported.

I think this is a very nice addition. It is a shame there is a race
with the existing process exiting, between the perf_event_open and the
/proc scanning. A PMU may return EBUSY just because say perf list is
probing features on the PMU so we probably have some extra retries for
EBUSY too.

> Signed-off-by: Chun-Tse Shao <ctshao@google.com>
> ---
>  tools/perf/util/evsel.c | 79 +++++++++++++++++++++++++++++------------
>  1 file changed, 57 insertions(+), 22 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 9a5b6a6f8d2e5..d2f7c19e023ec 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -3286,7 +3286,8 @@ static bool find_process(const char *name)
>         return ret ? false : true;
>  }
>
> -static int dump_perf_event_processes(char *msg, size_t size)
> +static int dump_perf_event_processes(const struct perf_event_attr *failed_attr,
> +                                    char *msg, size_t size)
>  {
>         DIR *proc_dir;
>         struct dirent *proc_entry;
> @@ -3327,29 +3328,61 @@ static int dump_perf_event_processes(char *msg, size_t size)
>                                 continue;
>                         /* Take care as readlink doesn't null terminate the string. */
>                         if (!strncmp(path, "anon_inode:[perf_event]", link_size)) {
> -                               int cmdline_fd;
> -                               ssize_t cmdline_size;
> -
> -                               scnprintf(path, sizeof(path), "%s/cmdline", proc_entry->d_name);
> -                               cmdline_fd = openat(dirfd(proc_dir), path, O_RDONLY);
> -                               if (cmdline_fd == -1)
> -                                       continue;
> -                               cmdline_size = read(cmdline_fd, path, sizeof(path) - 1);
> -                               close(cmdline_fd);
> -                               if (cmdline_size < 0)
> +                               int fdinfo_fd;
> +                               ssize_t fdinfo_size;
> +                               char *line;
> +                               u32 perf_event_type = PERF_TYPE_MAX;

PERF_TYPE_MAX is beyond the pre-defined perf PMU types at 6 but PMU
drivers loaded by the kernel may use this number - I think task-clock
may use this PMU type number but it shouldn't return EBUSY. Anyway, I
think -1 would be a better marker to use here and in the corresponding
check below.

Thanks,
Ian

> +
> +                               /* Let's check the PMU type reserved by this process */
> +                               scnprintf(path, sizeof(path), "%s/fdinfo/%s",
> +                                         proc_entry->d_name, fd_entry->d_name);
> +                               fdinfo_fd = openat(dirfd(proc_dir), path, O_RDONLY);
> +                               fdinfo_size = read(fdinfo_fd, path, sizeof(path) - 1);
> +                               if (fdinfo_size < 0)
>                                         continue;
> -                               path[cmdline_size] = '\0';
> -                               for (ssize_t i = 0; i < cmdline_size; i++) {
> -                                       if (path[i] == '\0')
> -                                               path[i] = ' ';
> +                               path[fdinfo_size] = '\0';
> +
> +                               line = strtok(path, "\n");
> +                               while (line != NULL) {
> +                                       if (sscanf(line,
> +                                                  "perf_event-attr.type:\t%u",
> +                                                  &perf_event_type) == 1)
> +                                               break;
> +                                       line = strtok(NULL, "\n");
>                                 }
>
> -                               if (printed == 0)
> -                                       printed += scnprintf(msg, size, "Possible processes:\n");
> -
> -                               printed += scnprintf(msg + printed, size - printed,
> -                                               "%s %s\n", proc_entry->d_name, path);
> -                               break;
> +                               /* Report the process which reserves the conflicted PMU. */
> +                               /* If fdinfo does not contain PMU type, report it too. */
> +                               if (perf_event_type == failed_attr->type ||
> +                                   perf_event_type == PERF_TYPE_MAX) {
> +                                       int cmdline_fd;
> +                                       ssize_t cmdline_size;
> +
> +                                       scnprintf(path, sizeof(path),
> +                                                 "%s/cmdline",
> +                                                 proc_entry->d_name);
> +                                       cmdline_fd = openat(dirfd(proc_dir), path, O_RDONLY);
> +                                       if (cmdline_fd == -1)
> +                                               continue;
> +                                       cmdline_size = read(cmdline_fd, path, sizeof(path) - 1);
> +                                       close(cmdline_fd);
> +                                       if (cmdline_size < 0)
> +                                               continue;
> +                                       path[cmdline_size] = '\0';
> +                                       for (ssize_t i = 0; i < cmdline_size; i++) {
> +                                               if (path[i] == '\0')
> +                                                       path[i] = ' ';
> +                                       }
> +
> +                                       if (printed == 0)
> +                                               printed += scnprintf(
> +                                                       msg, size,
> +                                                       "Possible processes:\n");
> +
> +                                       printed += scnprintf(msg + printed, size - printed,
> +                                                       "%s %s\n", proc_entry->d_name, path);
> +                                       break;
> +                               }
>                         }
>                 }
>                 closedir(fd_dir);
> @@ -3458,7 +3491,9 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
>                         msg, size,
>                         "The PMU %s counters are busy and in use by another process.\n",
>                         evsel->pmu ? evsel->pmu->name : "");
> -               return printed + dump_perf_event_processes(msg + printed, size - printed);
> +               return printed + dump_perf_event_processes(&evsel->core.attr,
> +                                                          msg + printed,
> +                                                          size - printed);
>                 break;
>         case EINVAL:
>                 if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
> --
> 2.47.0.163.g1226f6d8fa-goog
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] perf: Reveal PMU type in fdinfo
  2024-11-01 16:02   ` Ian Rogers
@ 2024-11-01 21:24     ` Chun-Tse Shao
  0 siblings, 0 replies; 9+ messages in thread
From: Chun-Tse Shao @ 2024-11-01 21:24 UTC (permalink / raw)
  To: Ian Rogers
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Ze Gao, Weilin Wang,
	linux-perf-users, kan.liang

Thank you for your review! Here is the link to the v2 patch with
fixes: https://lore.kernel.org/all/20241101211757.824743-2-ctshao@google.com/

On Fri, Nov 1, 2024 at 9:02 AM Ian Rogers <irogers@google.com> wrote:
>
> On Thu, Oct 31, 2024 at 3:39 PM Chun-Tse Shao <ctshao@google.com> wrote:
> >
> > It gives useful info on knowing which PMUs are reserved by this process.
> > Also add extra attributes which would be useful.
> >
> > ```
> > Testing cycles
> > $ ./perf stat -e cycles &
> > $ cat /proc/`pidof perf`/fdinfo/3
> > pos:    0
> > flags:  02000002
> > mnt_id: 16
> > ino:    3081
> > perf_event-orig_type:   0
>
> nit: I think this should be:
> perf_event-type:   0
> this output was from an earlier version.

Thank you for the correction!

>
> > perf_event-attr.config1:        0
> > perf_event-attr.config2:        0
> > perf_event-attr.config3:        0
> >
> > Testing L1-dcache-load-misses//
> > $ ./perf stat -e L1-dcache-load-misses &
> > $ cat /proc/`pidof perf`/fdinfo/3
> > pos:    0
> > flags:  02000002
> > mnt_id: 16
> > ino:    1072
> > perf_event-attr.type:   3
> > perf_event-attr.config: 65536
> > perf_event-attr.config1:        0
> > perf_event-attr.config2:        0
> > perf_event-attr.config3:        0
> > ```
> >
> > Signed-off-by: Chun-Tse Shao <ctshao@google.com>
> > ---
> >  kernel/events/core.c | 13 +++++++++++++
> >  1 file changed, 13 insertions(+)
> >
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index cdd09769e6c56..e0891c376fd9d 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -8,6 +8,7 @@
> >   *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
> >   */
> >
> > +#include "linux/seq_file.h"
>
> nit: I think you should use angle < > rather than quotes on the
> include for consistency.

Fixed.

>
> Thanks,
> Ian
>
> >  #include <linux/fs.h>
> >  #include <linux/mm.h>
> >  #include <linux/cpu.h>
> > @@ -6820,6 +6821,17 @@ static int perf_fasync(int fd, struct file *filp, int on)
> >         return 0;
> >  }
> >
> > +static void perf_show_fdinfo(struct seq_file *m, struct file *f)
> > +{
> > +       struct perf_event *event = f->private_data;
> > +
> > +       seq_printf(m, "perf_event-attr.type:\t%u\n", event->orig_type);
> > +       seq_printf(m, "perf_event-attr.config:\t%llu\n", event->attr.config);
> > +       seq_printf(m, "perf_event-attr.config1:\t%llu\n", event->attr.config1);
> > +       seq_printf(m, "perf_event-attr.config2:\t%llu\n", event->attr.config2);
> > +       seq_printf(m, "perf_event-attr.config3:\t%llu\n", event->attr.config3);
> > +}
> > +
> >  static const struct file_operations perf_fops = {
> >         .release                = perf_release,
> >         .read                   = perf_read,
> > @@ -6828,6 +6840,7 @@ static const struct file_operations perf_fops = {
> >         .compat_ioctl           = perf_compat_ioctl,
> >         .mmap                   = perf_mmap,
> >         .fasync                 = perf_fasync,
> > +       .show_fdinfo            = perf_show_fdinfo,
> >  };
> >
> >  /*
> > --
> > 2.47.0.163.g1226f6d8fa-goog
> >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] perf evsel: Find process with busy PMUs for EBUSY
  2024-11-01 16:14   ` Ian Rogers
@ 2024-11-01 21:32     ` Chun-Tse Shao
  0 siblings, 0 replies; 9+ messages in thread
From: Chun-Tse Shao @ 2024-11-01 21:32 UTC (permalink / raw)
  To: Ian Rogers
  Cc: linux-kernel, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Kan, Ze Gao,
	Weilin Wang, linux-perf-users

Thank you for your review! Here is the link to the v2 patch with
fixes: https://lore.kernel.org/all/20241101211757.824743-3-ctshao@google.com/

On Fri, Nov 1, 2024 at 9:14 AM Ian Rogers <irogers@google.com> wrote:
>
> On Thu, Oct 31, 2024 at 3:39 PM Chun-Tse Shao <ctshao@google.com> wrote:
> >
> > It parses fdinfo with PMU type, comparing with the event which failed to
> > open, and report the processes causing EBUSY error.
> >
> > ```
> > Testing cycles and intel_pt//
> > $ ./perf stat -e cycles &
> > [1] 55569
> > $ ./perf stat -e intel_pt// &
> > [2] 55683
> > $ ./perf stat -e intel_pt//
> > Error:
> > The PMU intel_pt counters are busy and in use by another process.
> > Possible processes:
> > 55683 ./perf stat -e intel_pt//
> > ```
> > Only perf with intel_pt was reported.
>
> I think this is a very nice addition. It is a shame there is a race
> with the existing process exiting, between the perf_event_open and the
> /proc scanning. A PMU may return EBUSY just because say perf list is
> probing features on the PMU so we probably have some extra retries for
> EBUSY too.
>
> > Signed-off-by: Chun-Tse Shao <ctshao@google.com>
> > ---
> >  tools/perf/util/evsel.c | 79 +++++++++++++++++++++++++++++------------
> >  1 file changed, 57 insertions(+), 22 deletions(-)
> >
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index 9a5b6a6f8d2e5..d2f7c19e023ec 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -3286,7 +3286,8 @@ static bool find_process(const char *name)
> >         return ret ? false : true;
> >  }
> >
> > -static int dump_perf_event_processes(char *msg, size_t size)
> > +static int dump_perf_event_processes(const struct perf_event_attr *failed_attr,
> > +                                    char *msg, size_t size)
> >  {
> >         DIR *proc_dir;
> >         struct dirent *proc_entry;
> > @@ -3327,29 +3328,61 @@ static int dump_perf_event_processes(char *msg, size_t size)
> >                                 continue;
> >                         /* Take care as readlink doesn't null terminate the string. */
> >                         if (!strncmp(path, "anon_inode:[perf_event]", link_size)) {
> > -                               int cmdline_fd;
> > -                               ssize_t cmdline_size;
> > -
> > -                               scnprintf(path, sizeof(path), "%s/cmdline", proc_entry->d_name);
> > -                               cmdline_fd = openat(dirfd(proc_dir), path, O_RDONLY);
> > -                               if (cmdline_fd == -1)
> > -                                       continue;
> > -                               cmdline_size = read(cmdline_fd, path, sizeof(path) - 1);
> > -                               close(cmdline_fd);
> > -                               if (cmdline_size < 0)
> > +                               int fdinfo_fd;
> > +                               ssize_t fdinfo_size;
> > +                               char *line;
> > +                               u32 perf_event_type = PERF_TYPE_MAX;
>
> PERF_TYPE_MAX is beyond the pre-defined perf PMU types at 6 but PMU
> drivers loaded by the kernel may use this number - I think task-clock
> may use this PMU type number but it shouldn't return EBUSY. Anyway, I
> think -1 would be a better marker to use here and in the corresponding
> check below.
>
> Thanks,
> Ian

In this case I am using UINT32_MAX instead to avoid the overflow. If
kernel would still use this value, I can make it u64 or add a variable
`found` instead.

>
> > +
> > +                               /* Let's check the PMU type reserved by this process */
> > +                               scnprintf(path, sizeof(path), "%s/fdinfo/%s",
> > +                                         proc_entry->d_name, fd_entry->d_name);
> > +                               fdinfo_fd = openat(dirfd(proc_dir), path, O_RDONLY);
> > +                               fdinfo_size = read(fdinfo_fd, path, sizeof(path) - 1);
> > +                               if (fdinfo_size < 0)
> >                                         continue;
> > -                               path[cmdline_size] = '\0';
> > -                               for (ssize_t i = 0; i < cmdline_size; i++) {
> > -                                       if (path[i] == '\0')
> > -                                               path[i] = ' ';
> > +                               path[fdinfo_size] = '\0';
> > +
> > +                               line = strtok(path, "\n");
> > +                               while (line != NULL) {
> > +                                       if (sscanf(line,
> > +                                                  "perf_event-attr.type:\t%u",
> > +                                                  &perf_event_type) == 1)
> > +                                               break;
> > +                                       line = strtok(NULL, "\n");
> >                                 }
> >
> > -                               if (printed == 0)
> > -                                       printed += scnprintf(msg, size, "Possible processes:\n");
> > -
> > -                               printed += scnprintf(msg + printed, size - printed,
> > -                                               "%s %s\n", proc_entry->d_name, path);
> > -                               break;
> > +                               /* Report the process which reserves the conflicted PMU. */
> > +                               /* If fdinfo does not contain PMU type, report it too. */
> > +                               if (perf_event_type == failed_attr->type ||
> > +                                   perf_event_type == PERF_TYPE_MAX) {
> > +                                       int cmdline_fd;
> > +                                       ssize_t cmdline_size;
> > +
> > +                                       scnprintf(path, sizeof(path),
> > +                                                 "%s/cmdline",
> > +                                                 proc_entry->d_name);
> > +                                       cmdline_fd = openat(dirfd(proc_dir), path, O_RDONLY);
> > +                                       if (cmdline_fd == -1)
> > +                                               continue;
> > +                                       cmdline_size = read(cmdline_fd, path, sizeof(path) - 1);
> > +                                       close(cmdline_fd);
> > +                                       if (cmdline_size < 0)
> > +                                               continue;
> > +                                       path[cmdline_size] = '\0';
> > +                                       for (ssize_t i = 0; i < cmdline_size; i++) {
> > +                                               if (path[i] == '\0')
> > +                                                       path[i] = ' ';
> > +                                       }
> > +
> > +                                       if (printed == 0)
> > +                                               printed += scnprintf(
> > +                                                       msg, size,
> > +                                                       "Possible processes:\n");
> > +
> > +                                       printed += scnprintf(msg + printed, size - printed,
> > +                                                       "%s %s\n", proc_entry->d_name, path);
> > +                                       break;
> > +                               }
> >                         }
> >                 }
> >                 closedir(fd_dir);
> > @@ -3458,7 +3491,9 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
> >                         msg, size,
> >                         "The PMU %s counters are busy and in use by another process.\n",
> >                         evsel->pmu ? evsel->pmu->name : "");
> > -               return printed + dump_perf_event_processes(msg + printed, size - printed);
> > +               return printed + dump_perf_event_processes(&evsel->core.attr,
> > +                                                          msg + printed,
> > +                                                          size - printed);
> >                 break;
> >         case EINVAL:
> >                 if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
> > --
> > 2.47.0.163.g1226f6d8fa-goog
> >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] perf: Reveal PMU type in fdinfo
  2024-10-31 22:39 ` [PATCH 2/3] perf: Reveal PMU type in fdinfo Chun-Tse Shao
  2024-11-01 16:02   ` Ian Rogers
@ 2024-11-05 12:24   ` Peter Zijlstra
  2024-11-05 15:15     ` Ian Rogers
  1 sibling, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2024-11-05 12:24 UTC (permalink / raw)
  To: Chun-Tse Shao
  Cc: linux-kernel, Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Ian Rogers,
	Adrian Hunter, Liang, Kan, Ze Gao, Yang Jihong, Weilin Wang,
	linux-perf-users

On Thu, Oct 31, 2024 at 10:39:44PM +0000, Chun-Tse Shao wrote:
> It gives useful info on knowing which PMUs are reserved by this process.
> Also add extra attributes which would be useful.
> 
> ```
> Testing cycles
> $ ./perf stat -e cycles &
> $ cat /proc/`pidof perf`/fdinfo/3
> pos:    0
> flags:  02000002
> mnt_id: 16
> ino:    3081
> perf_event-orig_type:   0
> perf_event-attr.config1:        0
> perf_event-attr.config2:        0
> perf_event-attr.config3:        0
> 
> Testing L1-dcache-load-misses//
> $ ./perf stat -e L1-dcache-load-misses &
> $ cat /proc/`pidof perf`/fdinfo/3
> pos:    0
> flags:  02000002
> mnt_id: 16
> ino:    1072
> perf_event-attr.type:   3
> perf_event-attr.config: 65536
> perf_event-attr.config1:        0
> perf_event-attr.config2:        0
> perf_event-attr.config3:        0
> ```

First time I hear about fdinfo.. How much of an ABI is this, and why
this random selection of the perf_event_attr structure? What if someone
else wants something and then we change it. Will this then break ABI?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] perf: Reveal PMU type in fdinfo
  2024-11-05 12:24   ` Peter Zijlstra
@ 2024-11-05 15:15     ` Ian Rogers
  0 siblings, 0 replies; 9+ messages in thread
From: Ian Rogers @ 2024-11-05 15:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Chun-Tse Shao, linux-kernel, Ingo Molnar,
	Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Liang, Kan, Ze Gao,
	Yang Jihong, Weilin Wang, linux-perf-users

On Tue, Nov 5, 2024 at 4:24 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Oct 31, 2024 at 10:39:44PM +0000, Chun-Tse Shao wrote:
> > It gives useful info on knowing which PMUs are reserved by this process.
> > Also add extra attributes which would be useful.
> >
> > ```
> > Testing cycles
> > $ ./perf stat -e cycles &
> > $ cat /proc/`pidof perf`/fdinfo/3
> > pos:    0
> > flags:  02000002
> > mnt_id: 16
> > ino:    3081
> > perf_event-orig_type:   0
> > perf_event-attr.config1:        0
> > perf_event-attr.config2:        0
> > perf_event-attr.config3:        0
> >
> > Testing L1-dcache-load-misses//
> > $ ./perf stat -e L1-dcache-load-misses &
> > $ cat /proc/`pidof perf`/fdinfo/3
> > pos:    0
> > flags:  02000002
> > mnt_id: 16
> > ino:    1072
> > perf_event-attr.type:   3
> > perf_event-attr.config: 65536
> > perf_event-attr.config1:        0
> > perf_event-attr.config2:        0
> > perf_event-attr.config3:        0
> > ```
>
> First time I hear about fdinfo.. How much of an ABI is this, and why
> this random selection of the perf_event_attr structure? What if someone
> else wants something and then we change it. Will this then break ABI?

Hi Peter,

There is a man page describing normal use of fdinfo:
https://man7.org/linux/man-pages/man5/proc_pid_fdinfo.5.html
I came across it wanting to implement a DRM PMU similar to the
(unmerged) hwmon PMU:
https://lore.kernel.org/all/20241022180623.463131-1-irogers@google.com/
The DRM extension to fdinfo isn't described in the man page so I
propose it in these patches:
https://lore.kernel.org/all/20241101211830.1298073-4-irogers@google.com/
Where DRM describes its fdinfo ABI here:
https://docs.kernel.org/gpu/drm-usage-stats.html
An example implementation is:
https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/tree/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c

> How much of an ABI is it?
I suspect that it is similar in this regard to anything in /proc, such
as /proc/pid/maps. /proc/pid/maps has been changed by things like
disabling the showing of "[stack:tid]" information, but generally it
stays the same. Adding information is obviously intended.

> Why this selection of perf_event_attr?
For the EBUSY warning perf_event-attr.type would suffice. I'd been
nagging for additional information for the sake of debugging. It gets
a little harder to print some values as they are in unions. These
values at least make a start.

> What if someone else wants something and then we change it. Will this then break ABI?
I don't think so. I can imagine lots of other information to be gained
through the API, like the hw_perf_event state.

Thanks,
Ian

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-11-05 15:16 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-31 22:39 [PATCH 1/3] perf evsel: Improve the evsel__open_strerror for EBUSY Chun-Tse Shao
2024-10-31 22:39 ` [PATCH 2/3] perf: Reveal PMU type in fdinfo Chun-Tse Shao
2024-11-01 16:02   ` Ian Rogers
2024-11-01 21:24     ` Chun-Tse Shao
2024-11-05 12:24   ` Peter Zijlstra
2024-11-05 15:15     ` Ian Rogers
2024-10-31 22:39 ` [PATCH 3/3] perf evsel: Find process with busy PMUs for EBUSY Chun-Tse Shao
2024-11-01 16:14   ` Ian Rogers
2024-11-01 21:32     ` Chun-Tse Shao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).