public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Jiri Olsa <jolsa@kernel.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>,
	Eranian Stephane <eranian@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>,
	broonie@kernel.org, Ravi Bangoria <ravi.bangoria@amd.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Zide Chen <zide.chen@intel.com>,
	Falcon Thomas <thomas.falcon@intel.com>,
	Dapeng Mi <dapeng1.mi@intel.com>,
	Xudong Hao <xudong.hao@intel.com>,
	Kan Liang <kan.liang@linux.intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>
Subject: [Patch v5 19/19] perf regs: Enable dumping of SIMD registers
Date: Wed,  3 Dec 2025 14:55:00 +0800	[thread overview]
Message-ID: <20251203065500.2597594-20-dapeng1.mi@linux.intel.com> (raw)
In-Reply-To: <20251203065500.2597594-1-dapeng1.mi@linux.intel.com>

From: Kan Liang <kan.liang@linux.intel.com>

This patch adds support for dumping SIMD registers using the new
PERF_SAMPLE_REGS_ABI_SIMD ABI.

Currently, the XMM, YMM, ZMM, OPMASK, eGPRs, and SSP registers on x86
platforms are supported with the PERF_SAMPLE_REGS_ABI_SIMD ABI.

An example of the output is displayed below.

Example:

 $perf record -e cycles:p -IXMM,YMM,OPMASK,SSP ./test
 $perf report -D
 ... ...
 237538985992962 0x454d0 [0x480]: PERF_RECORD_SAMPLE(IP, 0x1):
 179370/179370: 0xffffffff969627fc period: 124999 addr: 0
 ... intr regs: mask 0x20000000000 ABI 64-bit
 .... SSP   0x0000000000000000
 ... SIMD ABI nr_vectors 32 vector_qwords 4 nr_pred 8 pred_qwords 1
 .... YMM  [0] 0x0000000000004000
 .... YMM  [0] 0x000055e828695270
 .... YMM  [0] 0x0000000000000000
 .... YMM  [0] 0x0000000000000000
 .... YMM  [1] 0x000055e8286990e0
 .... YMM  [1] 0x000055e828698dd0
 .... YMM  [1] 0x0000000000000000
 .... YMM  [1] 0x0000000000000000
 ... ...
 .... YMM  [31] 0x0000000000000000
 .... YMM  [31] 0x0000000000000000
 .... YMM  [31] 0x0000000000000000
 .... YMM  [31] 0x0000000000000000
 .... OPMASK[0] 0x0000000000100221
 .... OPMASK[1] 0x0000000000000020
 .... OPMASK[2] 0x000000007fffffff
 .... OPMASK[3] 0x0000000000000000
 .... OPMASK[4] 0x0000000000000000
 .... OPMASK[5] 0x0000000000000000
 .... OPMASK[6] 0x0000000000000000
 .... OPMASK[7] 0x0000000000000000
 ... ...

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Co-developed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 tools/perf/util/evsel.c                       | 20 +++++
 .../perf/util/perf-regs-arch/perf_regs_x86.c  | 43 ++++++++++
 tools/perf/util/sample.h                      | 10 +++
 tools/perf/util/session.c                     | 78 +++++++++++++++++--
 4 files changed, 143 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 5d1d90cf9488..8f3fafe3a43f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3347,6 +3347,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 			regs->mask = mask;
 			regs->regs = (u64 *)array;
 			array = (void *)array + sz;
+
+			if (regs->abi & PERF_SAMPLE_REGS_ABI_SIMD) {
+				regs->config = *(u64 *)array;
+				array = (void *)array + sizeof(u64);
+				regs->data = (u64 *)array;
+				sz = (regs->nr_vectors * regs->vector_qwords +
+				      regs->nr_pred * regs->pred_qwords) * sizeof(u64);
+				OVERFLOW_CHECK(array, sz, max_size);
+				array = (void *)array + sz;
+			}
 		}
 	}
 
@@ -3404,6 +3414,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 			regs->mask = mask;
 			regs->regs = (u64 *)array;
 			array = (void *)array + sz;
+
+			if (regs->abi & PERF_SAMPLE_REGS_ABI_SIMD) {
+				regs->config = *(u64 *)array;
+				array = (void *)array + sizeof(u64);
+				regs->data = (u64 *)array;
+				sz = (regs->nr_vectors * regs->vector_qwords +
+				      regs->nr_pred * regs->pred_qwords) * sizeof(u64);
+				OVERFLOW_CHECK(array, sz, max_size);
+				array = (void *)array + sz;
+			}
 		}
 	}
 
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_x86.c b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
index 708954a9d35d..32dac438b12d 100644
--- a/tools/perf/util/perf-regs-arch/perf_regs_x86.c
+++ b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
@@ -5,6 +5,49 @@
 
 const char *__perf_reg_name_x86(int id)
 {
+	if (id > PERF_REG_X86_R15 && arch__intr_simd_reg_mask()) {
+		switch (id) {
+		case PERF_REG_X86_R16:
+			return "R16";
+		case PERF_REG_X86_R17:
+			return "R17";
+		case PERF_REG_X86_R18:
+			return "R18";
+		case PERF_REG_X86_R19:
+			return "R19";
+		case PERF_REG_X86_R20:
+			return "R20";
+		case PERF_REG_X86_R21:
+			return "R21";
+		case PERF_REG_X86_R22:
+			return "R22";
+		case PERF_REG_X86_R23:
+			return "R23";
+		case PERF_REG_X86_R24:
+			return "R24";
+		case PERF_REG_X86_R25:
+			return "R25";
+		case PERF_REG_X86_R26:
+			return "R26";
+		case PERF_REG_X86_R27:
+			return "R27";
+		case PERF_REG_X86_R28:
+			return "R28";
+		case PERF_REG_X86_R29:
+			return "R29";
+		case PERF_REG_X86_R30:
+			return "R30";
+		case PERF_REG_X86_R31:
+			return "R31";
+		case PERF_REG_X86_SSP:
+			return "SSP";
+		default:
+			return NULL;
+		}
+
+		return NULL;
+	}
+
 	switch (id) {
 	case PERF_REG_X86_AX:
 		return "AX";
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index fae834144ef4..3b247e0e8242 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -12,6 +12,16 @@ struct regs_dump {
 	u64 abi;
 	u64 mask;
 	u64 *regs;
+	union {
+		u64 config;
+		struct {
+			u16 nr_vectors;
+			u16 vector_qwords;
+			u16 nr_pred;
+			u16 pred_qwords;
+		};
+	};
+	u64 *data;
 
 	/* Cached values/mask filled by first register access. */
 	u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE];
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 09af486c83e4..c692be265c21 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -927,18 +927,78 @@ static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
 	}
 }
 
-static const char *regs_abi[] = {
-	[PERF_SAMPLE_REGS_ABI_NONE] = "none",
-	[PERF_SAMPLE_REGS_ABI_32] = "32-bit",
-	[PERF_SAMPLE_REGS_ABI_64] = "64-bit",
-};
+static void simd_regs_dump__printf(struct regs_dump *regs, bool intr)
+{
+	const char *name = "unknown";
+	const struct sample_reg *r;
+	int i, idx = 0;
+	u16 qwords;
+	int reg_idx;
+
+	if (!(regs->abi & PERF_SAMPLE_REGS_ABI_SIMD))
+		return;
+
+	printf("... SIMD ABI nr_vectors %d vector_qwords %d nr_pred %d pred_qwords %d\n",
+	       regs->nr_vectors, regs->vector_qwords,
+	       regs->nr_pred, regs->pred_qwords);
+
+	for (r = arch__sample_simd_reg_masks(); r->name; r++) {
+		if (!fls64(r->mask))
+			continue;
+		reg_idx = fls64(r->mask) - 1;
+		if (intr)
+			arch__intr_simd_reg_bitmap_qwords(reg_idx, &qwords);
+		else
+			arch__user_simd_reg_bitmap_qwords(reg_idx, &qwords);
+		if (regs->vector_qwords == qwords) {
+			name = r->name;
+			break;
+		}
+	}
+
+	for (i = 0; i < regs->nr_vectors; i++) {
+		printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->data[idx++]);
+		printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->data[idx++]);
+		if (regs->vector_qwords > 2) {
+			printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->data[idx++]);
+			printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->data[idx++]);
+		}
+		if (regs->vector_qwords > 4) {
+			printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->data[idx++]);
+			printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->data[idx++]);
+			printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->data[idx++]);
+			printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->data[idx++]);
+		}
+	}
+
+	name = "unknown";
+	for (r = arch__sample_pred_reg_masks(); r->name; r++) {
+		if (!fls64(r->mask))
+			continue;
+		reg_idx = fls64(r->mask) - 1;
+		if (intr)
+			arch__intr_pred_reg_bitmap_qwords(reg_idx, &qwords);
+		else
+			arch__user_pred_reg_bitmap_qwords(reg_idx, &qwords);
+		if (regs->pred_qwords == qwords) {
+			name = r->name;
+			break;
+		}
+	}
+	for (i = 0; i < regs->nr_pred; i++)
+		printf(".... %-5s[%d] 0x%016" PRIx64 "\n", name, i, regs->data[idx++]);
+}
 
 static inline const char *regs_dump_abi(struct regs_dump *d)
 {
-	if (d->abi > PERF_SAMPLE_REGS_ABI_64)
-		return "unknown";
+	if (!d->abi)
+		return "none";
+	if (d->abi & PERF_SAMPLE_REGS_ABI_32)
+		return "32-bit";
+	else if (d->abi & PERF_SAMPLE_REGS_ABI_64)
+		return "64-bit";
 
-	return regs_abi[d->abi];
+	return "unknown";
 }
 
 static void regs__printf(const char *type, struct regs_dump *regs, const char *arch)
@@ -964,6 +1024,7 @@ static void regs_user__printf(struct perf_sample *sample, const char *arch)
 
 	if (user_regs->regs)
 		regs__printf("user", user_regs, arch);
+	simd_regs_dump__printf(user_regs, false);
 }
 
 static void regs_intr__printf(struct perf_sample *sample, const char *arch)
@@ -977,6 +1038,7 @@ static void regs_intr__printf(struct perf_sample *sample, const char *arch)
 
 	if (intr_regs->regs)
 		regs__printf("intr", intr_regs, arch);
+	simd_regs_dump__printf(intr_regs, true);
 }
 
 static void stack_user__printf(struct stack_dump *dump)
-- 
2.34.1


  parent reply	other threads:[~2025-12-03  6:59 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-03  6:54 [Patch v5 00/19] Support SIMD/eGPRs/SSP registers sampling for perf Dapeng Mi
2025-12-03  6:54 ` [Patch v5 01/19] perf: Eliminate duplicate arch-specific functions definations Dapeng Mi
2025-12-03  6:54 ` [Patch v5 02/19] perf/x86: Use x86_perf_regs in the x86 nmi handler Dapeng Mi
2025-12-03  6:54 ` [Patch v5 03/19] perf/x86: Introduce x86-specific x86_pmu_setup_regs_data() Dapeng Mi
2025-12-03  6:54 ` [Patch v5 04/19] x86/fpu/xstate: Add xsaves_nmi() helper Dapeng Mi
2025-12-03  6:54 ` [Patch v5 05/19] perf: Move and rename has_extended_regs() for ARCH-specific use Dapeng Mi
2025-12-03  6:54 ` [Patch v5 06/19] perf/x86: Add support for XMM registers in non-PEBS and REGS_USER Dapeng Mi
2025-12-04 15:17   ` Peter Zijlstra
2025-12-04 15:47     ` Peter Zijlstra
2025-12-05  6:37       ` Mi, Dapeng
2025-12-04 18:59     ` Dave Hansen
2025-12-05  8:42       ` Peter Zijlstra
2025-12-03  6:54 ` [Patch v5 07/19] perf: Add sampling support for SIMD registers Dapeng Mi
2025-12-05 11:07   ` Peter Zijlstra
2025-12-08  5:24     ` Mi, Dapeng
2025-12-05 11:40   ` Peter Zijlstra
2025-12-08  6:00     ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 08/19] perf/x86: Enable XMM sampling using sample_simd_vec_reg_* fields Dapeng Mi
2025-12-05 11:25   ` Peter Zijlstra
2025-12-08  6:10     ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 09/19] perf/x86: Enable YMM " Dapeng Mi
2025-12-03  6:54 ` [Patch v5 10/19] perf/x86: Enable ZMM " Dapeng Mi
2025-12-03  6:54 ` [Patch v5 11/19] perf/x86: Enable OPMASK sampling using sample_simd_pred_reg_* fields Dapeng Mi
2025-12-03  6:54 ` [Patch v5 12/19] perf/x86: Enable eGPRs sampling using sample_regs_* fields Dapeng Mi
2025-12-05 12:16   ` Peter Zijlstra
2025-12-08  6:11     ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 13/19] perf/x86: Enable SSP " Dapeng Mi
2025-12-05 12:20   ` Peter Zijlstra
2025-12-08  6:21     ` Mi, Dapeng
2025-12-24  5:45   ` Ravi Bangoria
2025-12-24  6:26     ` Mi, Dapeng
2026-01-06  6:55       ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 14/19] perf/x86/intel: Enable PERF_PMU_CAP_SIMD_REGS capability Dapeng Mi
2025-12-03  6:54 ` [Patch v5 15/19] perf/x86/intel: Enable arch-PEBS based SIMD/eGPRs/SSP sampling Dapeng Mi
2025-12-03  6:54 ` [Patch v5 16/19] perf/x86: Activate back-to-back NMI detection for arch-PEBS induced NMIs Dapeng Mi
2025-12-05 12:39   ` Peter Zijlstra
2025-12-07 20:44     ` Andi Kleen
2025-12-08  6:46     ` Mi, Dapeng
2025-12-08  8:50       ` Peter Zijlstra
2025-12-08  8:53         ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 17/19] perf headers: Sync with the kernel headers Dapeng Mi
2025-12-03 23:43   ` Ian Rogers
2025-12-04  1:37     ` Mi, Dapeng
2025-12-04  7:28       ` Ian Rogers
2026-01-20  7:01   ` Ian Rogers
2026-01-20  7:25     ` Mi, Dapeng
2026-01-20  7:16   ` Ian Rogers
2026-01-20  7:43     ` Mi, Dapeng
2026-01-20  8:00       ` Ian Rogers
2026-01-20  9:22         ` Mi, Dapeng
2026-01-20 18:11           ` Ian Rogers
2026-01-21  2:03             ` Mi, Dapeng
2025-12-03  6:54 ` [Patch v5 18/19] perf parse-regs: Support new SIMD sampling format Dapeng Mi
2025-12-04  0:17   ` Ian Rogers
2025-12-04  2:58     ` Mi, Dapeng
2025-12-04  7:49       ` Ian Rogers
2025-12-04  9:20         ` Mi, Dapeng
2025-12-04 16:16           ` Ian Rogers
2025-12-05  4:00             ` Mi, Dapeng
2025-12-05  6:38               ` Ian Rogers
2025-12-05  8:10                 ` Mi, Dapeng
2025-12-05 16:35                   ` Ian Rogers
2025-12-08  4:20                     ` Mi, Dapeng
2026-01-06  7:27                       ` Mi, Dapeng
2026-01-17  5:50                         ` Ian Rogers
2026-01-19  6:55                           ` Mi, Dapeng
2026-01-19 20:25                             ` Ian Rogers
2026-01-20  3:04                               ` Mi, Dapeng
2026-01-20  5:16                                 ` Ian Rogers
2026-01-20  6:46                                   ` Mi, Dapeng
2026-01-20  6:56                                     ` Ian Rogers
2026-01-20  7:39   ` Ian Rogers
2026-01-20  9:04     ` Mi, Dapeng
2026-01-20 18:20       ` Ian Rogers
2026-01-21  5:17         ` Mi, Dapeng
2026-01-21  7:09           ` Ian Rogers
2026-01-21  7:52             ` Mi, Dapeng
2026-01-21 14:48               ` Ian Rogers
2026-01-22  1:49                 ` Mi, Dapeng
2026-01-22  7:27                   ` Ian Rogers
2026-01-22  8:29                     ` Mi, Dapeng
2025-12-03  6:55 ` Dapeng Mi [this message]
2025-12-04  0:24 ` [Patch v5 00/19] Support SIMD/eGPRs/SSP registers sampling for perf Ian Rogers
2025-12-04  3:28   ` Mi, Dapeng
2025-12-16  4:42 ` Ravi Bangoria
2025-12-16  6:59   ` Mi, Dapeng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251203065500.2597594-20-dapeng1.mi@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=broonie@kernel.org \
    --cc=dapeng1.mi@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@amd.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.falcon@intel.com \
    --cc=xudong.hao@intel.com \
    --cc=zide.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox