* [PATCH] perf riscv: Add SDT argument parsing for RISC-V
@ 2026-04-14 12:46 cp0613
2026-04-14 13:17 ` sashiko-bot
0 siblings, 1 reply; 3+ messages in thread
From: cp0613 @ 2026-04-14 12:46 UTC (permalink / raw)
To: peterz, mingo, acme, namhyung, irogers, dapeng1.mi, pjw, alex,
guoren
Cc: linux-perf-users, linux-riscv, linux-kernel, Chen Pei
From: Chen Pei <cp0613@linux.alibaba.com>
Implement __perf_sdt_arg_parse_op_riscv() to convert RISC-V GCC-generated
SDT probe operands into uprobe-compatible format, and register it in the
perf_sdt_arg_parse_op() dispatcher for EM_RISCV.
RISC-V GCC uses the 'nor' constraint for SDT arguments, producing operands
in the following formats:
Format Example Uprobe format
----------- ----------- -------------
register a0 %a0
memory (+) 8(a0) +8(%a0)
memory (-) -20(s0) -20(%s0)
constant 99 (skip, not supported by uprobe)
Key differences from other architectures:
- Register names use ABI aliases (a0-a7, t0-t6, s0-s11, sp, ra, etc.)
without any '%' prefix, unlike x86 (%rax) or arm64 (x0).
- Memory operands use OFFSET(REG) syntax where OFFSET may be negative,
unlike arm64's [sp, NUM] or powerpc's NUM(%rREG).
Two regexes are used:
- SDT_OP_REGEX1: matches all RISC-V ABI register names
- SDT_OP_REGEX2: matches [-]NUM(REG) memory operands
Signed-off-by: Chen Pei <cp0613@linux.alibaba.com>
---
Tested on RISC-V QEMU(rv64gc) with GCC-generated SDT probes.
Requires systemtap-sdt-dev (provides <sys/sdt.h>) on the target system.
# cat sdt_test.c
#include <sys/sdt.h>
#include <stdio.h>
void my_func(int a, long b) {
DTRACE_PROBE2(myapp, my_probe, a, b);
printf("a=%d b=%ld\n", a, b);
}
int main() { my_func(42, -100); return 0; }
# gcc -O0 -Wl,--build-id -o sdt_test_O0 sdt_test.c # stack args: -20(s0)
# gcc -O2 -Wl,--build-id -o sdt_test_O2 sdt_test.c # reg args: a0
# perf buildid-cache --add sdt_test_O0
# perf buildid-cache --add sdt_test_O2
# find ~/.debug -name "probes" | xargs cat
p:sdt_myapp/my_probe .../sdt_test_O0:0x... arg1=-20(%s0):s32 arg2=-32(%s0):s64
p:sdt_myapp/my_probe .../sdt_test_O2:0x... arg1=%a0:s32 arg2=%a1:s64
.../util/perf-regs-arch/perf_regs_riscv.c | 126 ++++++++++++++++++
tools/perf/util/perf_regs.c | 3 +
tools/perf/util/perf_regs.h | 1 +
3 files changed, 130 insertions(+)
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
index 5b5f21fcba8c..51b43e68e0e3 100644
--- a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
+++ b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
@@ -1,8 +1,134 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <regex.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+
+#include "../debug.h"
#include "../perf_regs.h"
#include "../../arch/riscv/include/perf_regs.h"
+/*
+ * RISC-V SDT argument formats (GCC 'nor' constraint):
+ *
+ * Register: REG e.g. a0, t1, s0, sp
+ * Memory: NUM(REG) e.g. 8(a0), -20(s0)
+ * Constant: NUM e.g. 99 (not supported by uprobe, skip)
+ *
+ * Uprobe target format:
+ * Register: %REG e.g. %a0
+ * Memory: +NUM(%REG) or -NUM(%REG)
+ */
+
+/* RISC-V register ABI names: zero, ra, sp, gp, tp, t0-t6, s0-s11, a0-a7 */
+#define SDT_OP_REGEX1 "^(zero|ra|sp|gp|tp|t[0-6]|s[0-9]|s1[01]|a[0-7])$"
+
+/* RISC-V memory operand: [-]NUM(REG) */
+#define SDT_OP_REGEX2 "^(\\-)?([0-9]+)\\((zero|ra|sp|gp|tp|t[0-6]|s[0-9]|s1[01]|a[0-7])\\)$"
+
+static regex_t sdt_op_regex1, sdt_op_regex2;
+
+static int sdt_init_op_regex(void)
+{
+ static int initialized;
+ int ret = 0;
+
+ if (initialized)
+ return 0;
+
+ ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
+ if (ret)
+ goto error;
+
+ ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
+ if (ret)
+ goto free_regex1;
+
+ initialized = 1;
+ return 0;
+
+free_regex1:
+ regfree(&sdt_op_regex1);
+error:
+ pr_debug4("Regex compilation error.\n");
+ return ret;
+}
+
+/*
+ * Parse OP and convert it into uprobe format.
+ * Possible variants of OP (RISC-V, GCC 'nor' constraint):
+ *
+ * Format Example Uprobe
+ * ----------------------------------------
+ * REG a0 %a0
+ * NUM(REG) 8(a0) +8(%a0)
+ * -NUM(REG) -20(s0) -20(%s0)
+ * NUM 99 (skip, constant not supported)
+ */
+int __perf_sdt_arg_parse_op_riscv(char *old_op, char **new_op)
+{
+ int ret, new_len;
+ regmatch_t rm[4];
+ char prefix;
+
+ /*
+ * Constant argument: pure integer with no trailing '(' (e.g. "99", "-1").
+ * uprobe does not support immediate values, so skip them.
+ * Memory operands like "8(a0)" or "-20(s0)" contain '(' so are NOT
+ * treated as constants here; they will be matched by REGEX2 below.
+ */
+ if (strchr(old_op, '(') == NULL &&
+ ((*old_op >= '0' && *old_op <= '9') ||
+ (*old_op == '-' && old_op[1] >= '0' && old_op[1] <= '9'))) {
+ pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+ return SDT_ARG_SKIP;
+ }
+
+ ret = sdt_init_op_regex();
+ if (ret < 0)
+ return ret;
+
+ if (!regexec(&sdt_op_regex1, old_op, 2, rm, 0)) {
+ /* REG --> %REG */
+ new_len = 2; /* % NULL */
+ new_len += (int)(rm[1].rm_eo - rm[1].rm_so);
+
+ *new_op = zalloc(new_len);
+ if (!*new_op)
+ return -ENOMEM;
+
+ scnprintf(*new_op, new_len, "%%%.*s",
+ (int)(rm[1].rm_eo - rm[1].rm_so), old_op + rm[1].rm_so);
+ } else if (!regexec(&sdt_op_regex2, old_op, 4, rm, 0)) {
+ /*
+ * NUM(REG) or -NUM(REG) --> +NUM(%REG) or -NUM(%REG)
+ * rm[1]: optional '-'
+ * rm[2]: decimal offset
+ * rm[3]: register name
+ */
+ prefix = (rm[1].rm_so == -1) ? '+' : '-';
+
+ new_len = 5; /* sign ( % ) NULL */
+ new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
+ new_len += (int)(rm[3].rm_eo - rm[3].rm_so);
+
+ *new_op = zalloc(new_len);
+ if (!*new_op)
+ return -ENOMEM;
+
+ scnprintf(*new_op, new_len, "%c%.*s(%%%.*s)", prefix,
+ (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
+ (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so);
+ } else {
+ pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+ return SDT_ARG_SKIP;
+ }
+
+ return SDT_ARG_VALID;
+}
+
uint64_t __perf_reg_mask_riscv(bool intr __maybe_unused)
{
return PERF_REGS_MASK;
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 5b8f34beb24e..57a1d227d1b2 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -19,6 +19,9 @@ int perf_sdt_arg_parse_op(uint16_t e_machine, char *old_op, char **new_op)
case EM_PPC64:
ret = __perf_sdt_arg_parse_op_powerpc(old_op, new_op);
break;
+ case EM_RISCV:
+ ret = __perf_sdt_arg_parse_op_riscv(old_op, new_op);
+ break;
case EM_386:
case EM_X86_64:
ret = __perf_sdt_arg_parse_op_x86(old_op, new_op);
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index 7c04700bf837..e0b51b514ee0 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -53,6 +53,7 @@ const char *__perf_reg_name_powerpc(int id);
uint64_t __perf_reg_ip_powerpc(void);
uint64_t __perf_reg_sp_powerpc(void);
+int __perf_sdt_arg_parse_op_riscv(char *old_op, char **new_op);
uint64_t __perf_reg_mask_riscv(bool intr);
const char *__perf_reg_name_riscv(int id);
uint64_t __perf_reg_ip_riscv(void);
--
2.50.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] perf riscv: Add SDT argument parsing for RISC-V
2026-04-14 12:46 [PATCH] perf riscv: Add SDT argument parsing for RISC-V cp0613
@ 2026-04-14 13:17 ` sashiko-bot
2026-04-16 2:49 ` Chen Pei
0 siblings, 1 reply; 3+ messages in thread
From: sashiko-bot @ 2026-04-14 13:17 UTC (permalink / raw)
To: Unknown; +Cc: linux-perf-users
Sashiko AI review found 2 potential issue(s):
- [Medium] The 'zero' register is translated to '%zero' for uprobes, causing the kernel to reject the probe with -EINVAL.
- [Medium] Incorrect error checking of `sdt_init_op_regex()` leads to a Use-After-Free or a crash due to uninitialized `regex_t` structures if `regcomp()` fails.
--
commit 8e1a4d45321e1b5de5891b9276cd561935f4b98f
Author: Chen Pei <cp0613@linux.alibaba.com>
perf riscv: Add SDT argument parsing for RISC-V
This commit implements __perf_sdt_arg_parse_op_riscv() to parse RISC-V SDT
probe operands and convert them into uprobe-compatible formats. It uses
regular expressions to match register and memory operand formats.
> diff --git a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
> index 5b5f21fcba8c6..51b43e68e0e3d 100644
> --- a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
> +++ b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
[ ... ]
> +/* RISC-V register ABI names: zero, ra, sp, gp, tp, t0-t6, s0-s11, a0-a7 */
> +#define SDT_OP_REGEX1 "^(zero|ra|sp|gp|tp|t[0-6]|s[0-9]|s1[01]|a[0-7])$"
> +
> +/* RISC-V memory operand: [-]NUM(REG) */
> +#define SDT_OP_REGEX2 "^(\\-)?([0-9]+)\\((zero|ra|sp|gp|tp|t[0-6]|s[0-9]|s1[01]|a[0-7])\\)$"
Since the 'zero' register is hardwired and not saved in pt_regs, will the
kernel reject probes with a BAD_REG_NAME error when perf translates it
to '%zero' for tracefs?
Should 'zero' be excluded from these regexes and instead treated as an
unsupported constant-like argument to be skipped with SDT_ARG_SKIP?
[ ... ]
> +static int sdt_init_op_regex(void)
> +{
> + static int initialized;
> + int ret = 0;
> +
> + if (initialized)
> + return 0;
> +
> + ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
> + if (ret)
> + goto error;
> +
> + ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
> + if (ret)
> + goto free_regex1;
> +
> + initialized = 1;
> + return 0;
> +
> +free_regex1:
> + regfree(&sdt_op_regex1);
> +error:
> + pr_debug4("Regex compilation error.\n");
> + return ret;
> +}
[ ... ]
> +int __perf_sdt_arg_parse_op_riscv(char *old_op, char **new_op)
> +{
> + int ret, new_len;
> + regmatch_t rm[4];
> + char prefix;
[ ... ]
> + ret = sdt_init_op_regex();
> + if (ret < 0)
> + return ret;
Does this properly catch regex compilation failures?
POSIX regcomp() returns a strictly positive integer (like REG_ESPACE)
on error, so this condition will evaluate to false if regcomp() fails.
If the first regcomp() in sdt_init_op_regex() fails, could this lead to a
crash when calling regexec() on the zero-initialized sdt_op_regex1 below?
Alternatively, if the second regcomp() fails, sdt_init_op_regex()
calls regfree(&sdt_op_regex1) before returning the positive error code.
Could this cause a use-after-free when regexec() operates on the freed
regex object?
> +
> + if (!regexec(&sdt_op_regex1, old_op, 2, rm, 0)) {
> + /* REG --> %REG */
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260414124655.129463-1-cp0613@linux.alibaba.com?part=1
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] perf riscv: Add SDT argument parsing for RISC-V
2026-04-14 13:17 ` sashiko-bot
@ 2026-04-16 2:49 ` Chen Pei
0 siblings, 0 replies; 3+ messages in thread
From: Chen Pei @ 2026-04-16 2:49 UTC (permalink / raw)
To: sashiko-bot; +Cc: linux-perf-users, sashiko
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=y, Size: 3224 bytes --]
On Tue, 14 Apr 2026 13:17:52 +0000, sashiko-bot@kernel.org wrote:
> > diff --git a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
> > index 5b5f21fcba8c6..51b43e68e0e3d 100644
> > --- a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
> > +++ b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
>
> [ ... ]
>
> > +/* RISC-V register ABI names: zero, ra, sp, gp, tp, t0-t6, s0-s11, a0-a7 */
> > +#define SDT_OP_REGEX1 "^(zero|ra|sp|gp|tp|t[0-6]|s[0-9]|s1[01]|a[0-7])$"
> > +
> > +/* RISC-V memory operand: [-]NUM(REG) */
> > +#define SDT_OP_REGEX2 "^(\\-)?([0-9]+)\\((zero|ra|sp|gp|tp|t[0-6]|s[0-9]|s1[01]|a[0-7])\\)$"
>
> Since the 'zero' register is hardwired and not saved in pt_regs, will the
> kernel reject probes with a BAD_REG_NAME error when perf translates it
> to '%zero' for tracefs?
>
> Should 'zero' be excluded from these regexes and instead treated as an
> unsupported constant-like argument to be skipped with SDT_ARG_SKIP?
Good point. The 'zero' register (x0) is not saved in pt_regs and is absent
from regoffset_table in arch/riscv/kernel/ptrace.c, so passing '%zero' to
tracefs would trigger BAD_REG_NAME. Fixed by removing 'zero' from both regexes
and letting it fall through to SDT_ARG_SKIP.
> [ ... ]
>
> > +static int sdt_init_op_regex(void)
> > +{
> > + static int initialized;
> > + int ret = 0;
> > +
> > + if (initialized)
> > + return 0;
> > +
> > + ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
> > + if (ret)
> > + goto error;
> > +
> > + ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
> > + if (ret)
> > + goto free_regex1;
> > +
> > + initialized = 1;
> > + return 0;
> > +
> > +free_regex1:
> > + regfree(&sdt_op_regex1);
> > +error:
> > + pr_debug4("Regex compilation error.\n");
> > + return ret;
> > +}
>
> [ ... ]
>
> > +int __perf_sdt_arg_parse_op_riscv(char *old_op, char **new_op)
> > +{
> > + int ret, new_len;
> > + regmatch_t rm[4];
> > + char prefix;
>
> [ ... ]
>
> > + ret = sdt_init_op_regex();
> > + if (ret < 0)
> > + return ret;
>
> Does this properly catch regex compilation failures?
>
> POSIX regcomp() returns a strictly positive integer (like REG_ESPACE)
> on error, so this condition will evaluate to false if regcomp() fails.
>
> If the first regcomp() in sdt_init_op_regex() fails, could this lead to a
> crash when calling regexec() on the zero-initialized sdt_op_regex1 below?
>
> Alternatively, if the second regcomp() fails, sdt_init_op_regex()
> calls regfree(&sdt_op_regex1) before returning the positive error code.
>
> Could this cause a use-after-free when regexec() operates on the freed
> regex object?
Good catch. regcomp() returns a strictly positive integer on error, so
ret < 0 was indeed always false, causing regexec() to be called on an
uninitialized or freed regex object.
Fixed by returning -ret at the error path, so the caller's if (ret < 0)
check properly catches the failure and bails out before reaching regexec().
> > +
> > + if (!regexec(&sdt_op_regex1, old_op, 2, rm, 0)) {
> > + /* REG --> %REG */
>
> --
> Sashiko AI review · https://sashiko.dev/#/patchset/20260414124655.129463-1-cp0613@linux.alibaba.com?part=1
Thanks,
Pei
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-04-16 2:49 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-14 12:46 [PATCH] perf riscv: Add SDT argument parsing for RISC-V cp0613
2026-04-14 13:17 ` sashiko-bot
2026-04-16 2:49 ` Chen Pei
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox