linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/3] perf evsel: Improve the evsel__open_strerror for EBUSY
@ 2024-11-06  0:30 Chun-Tse Shao
  2024-11-06  0:30 ` [PATCH v3 2/3] perf: Reveal PMU type in fdinfo Chun-Tse Shao
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Chun-Tse Shao @ 2024-11-06  0:30 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, Kan Liang, Ze Gao, Chun-Tse Shao,
	Weilin Wang, linux-perf-users

From: Ian Rogers <irogers@google.com>

The existing EBUSY strerror message is:

  The sys_perf_event_open() syscall returned with 16 (Device or resource busy) for event (intel_bts//).
  "dmesg | grep -i perf" may provide additional information.

The dmesg won't be useful. What is more useful is knowing what
processes are potentially using the PMU, which some procfs scanning can
reveal. When parallel testing tests/shell/stat_all_pmu.sh this yields:

  Testing intel_bts//
  Error:
  The PMU intel_bts counters are busy and in use by another process.
  Possible processes:
  2585882 perf list
  2585902 perf list -j -o /tmp/__perf_test.list_output.json.KF9MY
  2585904 perf list
  2585911 perf record -e task-clock --filter period > 1 -o /dev/null --quiet true
  2585912 perf list
  2585915 perf list
  2586042 /tmp/perf/perf record -asdg -e cpu-clock -o /tmp/perftool-testsuite_report.dIF/perf_report/perf.data -- sleep 2
  2589078 perf record -g -e task-clock:u -o - perf test -w noploop
  2589148 /tmp/perf/perf record --control=fifo:control,ack -e cpu-clock -m 1 sleep 10
  2589379 perf --buildid-dir /tmp/perf.debug.Umx record --buildid-all -o /tmp/perf.data.YBm /tmp/perf.ex.MD5.ZQW
  2589568 perf record -o /tmp/__perf_test.program.mtcZH/perf.data --branch-filter any,save_type,u -- perf test -w brstack
  2589649 perf record --per-thread -o /tmp/__perf_test.perf.data.5d3dc perf test -w thloop
  2589898 perf record -o /tmp/perf-test-script.BX2b27Dcnj/pp-perf.data --sample-cpu uname

Which gets a little closer to finding the issue.

Signed-off-by: Ian Rogers <irogers@google.com>
Change-Id: Ie1ed8688286c44e8f44a35e98fed8be3e2a344df
---
 tools/perf/util/evsel.c | 79 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dbf9c8cee3c56..d001ecfa26bf7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3286,6 +3286,78 @@ static bool find_process(const char *name)
 	return ret ? false : true;
 }
 
+static int dump_perf_event_processes(char *msg, size_t size)
+{
+	DIR *proc_dir;
+	struct dirent *proc_entry;
+	int printed = 0;
+
+	proc_dir = opendir(procfs__mountpoint());
+	if (!proc_dir)
+		return 0;
+
+	/* Walk through the /proc directory. */
+	while ((proc_entry = readdir(proc_dir)) != NULL) {
+		char buf[256];
+		DIR *fd_dir;
+		struct dirent *fd_entry;
+		int fd_dir_fd;
+
+		if (proc_entry->d_type != DT_DIR ||
+		    !isdigit(proc_entry->d_name[0]) ||
+		    strlen(proc_entry->d_name) > sizeof(buf) - 4)
+			continue;
+
+		scnprintf(buf, sizeof(buf), "%s/fd", proc_entry->d_name);
+		fd_dir_fd = openat(dirfd(proc_dir), buf, O_DIRECTORY);
+		if (fd_dir_fd == -1)
+			continue;
+		fd_dir = fdopendir(fd_dir_fd);
+		if (!fd_dir) {
+			close(fd_dir_fd);
+			continue;
+		}
+		while ((fd_entry = readdir(fd_dir)) != NULL) {
+			ssize_t link_size;
+
+			if (fd_entry->d_type != DT_LNK)
+				continue;
+			link_size = readlinkat(fd_dir_fd, fd_entry->d_name, buf, sizeof(buf));
+			if (link_size < 0)
+				continue;
+			/* Take care as readlink doesn't null terminate the string. */
+			if (!strncmp(buf, "anon_inode:[perf_event]", link_size)) {
+				int cmdline_fd;
+				ssize_t cmdline_size;
+
+				scnprintf(buf, sizeof(buf), "%s/cmdline", proc_entry->d_name);
+				cmdline_fd = openat(dirfd(proc_dir), buf, O_RDONLY);
+				if (cmdline_fd == -1)
+					continue;
+				cmdline_size = read(cmdline_fd, buf, sizeof(buf) - 1);
+				close(cmdline_fd);
+				if (cmdline_size < 0)
+					continue;
+				buf[cmdline_size] = '\0';
+				for (ssize_t i = 0; i < cmdline_size; i++) {
+					if (buf[i] == '\0')
+						buf[i] = ' ';
+				}
+
+				if (printed == 0)
+					printed += scnprintf(msg, size, "Possible processes:\n");
+
+				printed += scnprintf(msg + printed, size - printed,
+						"%s %s\n", proc_entry->d_name, buf);
+				break;
+			}
+		}
+		closedir(fd_dir);
+	}
+	closedir(proc_dir);
+	return printed;
+}
+
 int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
 				     char *msg __maybe_unused,
 				     size_t size __maybe_unused)
@@ -3319,7 +3391,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			printed += scnprintf(msg, size,
 				"No permission to enable %s event.\n\n", evsel__name(evsel));
 
-		return scnprintf(msg + printed, size - printed,
+		return printed + scnprintf(msg + printed, size - printed,
 		 "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
 		 "access to performance monitoring and observability operations for processes\n"
 		 "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
@@ -3382,6 +3454,11 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			return scnprintf(msg, size,
 	"The PMU counters are busy/taken by another profiler.\n"
 	"We found oprofile daemon running, please stop it and try again.");
+		printed += scnprintf(
+			msg, size,
+			"The PMU %s counters are busy and in use by another process.\n",
+			evsel->pmu ? evsel->pmu->name : "");
+		return printed + dump_perf_event_processes(msg + printed, size - printed);
 		break;
 	case EINVAL:
 		if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
-- 
2.47.0.199.ga7371fff76-goog


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2025-04-09 15:52 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-06  0:30 [PATCH v3 1/3] perf evsel: Improve the evsel__open_strerror for EBUSY Chun-Tse Shao
2024-11-06  0:30 ` [PATCH v3 2/3] perf: Reveal PMU type in fdinfo Chun-Tse Shao
2024-11-14 15:49   ` Ian Rogers
2024-11-14 18:30     ` Chun-Tse Shao
2024-11-21 21:18       ` Chun-Tse Shao
2024-12-19  5:37         ` Chun-Tse Shao
2025-01-07 19:44   ` Arnaldo Carvalho de Melo
2025-03-17 15:42     ` Ian Rogers
2025-04-09 15:51       ` Ian Rogers
2024-11-06  0:30 ` [PATCH v3 3/3] perf evsel: Find process with busy PMUs for EBUSY Chun-Tse Shao
2024-11-14 15:51   ` Ian Rogers
2025-01-06 21:59 ` [PATCH v3 1/3] perf evsel: Improve the evsel__open_strerror " Ian Rogers
2025-01-07 19:42   ` Arnaldo Carvalho de Melo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).