public inbox for linux-riscv@lists.infradead.org
 help / color / mirror / Atom feed
From: cp0613@linux.alibaba.com
To: peterz@infradead.org, mingo@redhat.com, acme@kernel.org,
	namhyung@kernel.org, irogers@google.com,
	dapeng1.mi@linux.intel.com, pjw@kernel.org, alex@ghiti.fr,
	guoren@kernel.org
Cc: linux-perf-users@vger.kernel.org,
	linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org,
	Chen Pei <cp0613@linux.alibaba.com>
Subject: [PATCH] perf riscv: Add SDT argument parsing for RISC-V
Date: Tue, 14 Apr 2026 20:46:55 +0800	[thread overview]
Message-ID: <20260414124655.129463-1-cp0613@linux.alibaba.com> (raw)

From: Chen Pei <cp0613@linux.alibaba.com>

Implement __perf_sdt_arg_parse_op_riscv() to convert RISC-V GCC-generated
SDT probe operands into uprobe-compatible format, and register it in the
perf_sdt_arg_parse_op() dispatcher for EM_RISCV.

RISC-V GCC uses the 'nor' constraint for SDT arguments, producing operands
in the following formats:

  Format       Example      Uprobe format
  -----------  -----------  -------------
  register     a0           %a0
  memory (+)   8(a0)        +8(%a0)
  memory (-)   -20(s0)      -20(%s0)
  constant     99           (skip, not supported by uprobe)

Key differences from other architectures:
 - Register names use ABI aliases (a0-a7, t0-t6, s0-s11, sp, ra, etc.)
   without any '%' prefix, unlike x86 (%rax) or arm64 (x0).
 - Memory operands use OFFSET(REG) syntax where OFFSET may be negative,
   unlike arm64's [sp, NUM] or powerpc's NUM(%rREG).

Two regexes are used:
 - SDT_OP_REGEX1: matches all RISC-V ABI register names
 - SDT_OP_REGEX2: matches [-]NUM(REG) memory operands

Signed-off-by: Chen Pei <cp0613@linux.alibaba.com>
---

Tested on RISC-V QEMU(rv64gc) with GCC-generated SDT probes.
Requires systemtap-sdt-dev (provides <sys/sdt.h>) on the target system.

  # cat sdt_test.c
  #include <sys/sdt.h>
  #include <stdio.h>
  void my_func(int a, long b) {
      DTRACE_PROBE2(myapp, my_probe, a, b);
      printf("a=%d b=%ld\n", a, b);
  }
  int main() { my_func(42, -100); return 0; }

  # gcc -O0 -Wl,--build-id -o sdt_test_O0 sdt_test.c  # stack args: -20(s0)
  # gcc -O2 -Wl,--build-id -o sdt_test_O2 sdt_test.c  # reg args:   a0
  # perf buildid-cache --add sdt_test_O0
  # perf buildid-cache --add sdt_test_O2
  # find ~/.debug -name "probes" | xargs cat
  p:sdt_myapp/my_probe .../sdt_test_O0:0x... arg1=-20(%s0):s32 arg2=-32(%s0):s64
  p:sdt_myapp/my_probe .../sdt_test_O2:0x... arg1=%a0:s32 arg2=%a1:s64

 .../util/perf-regs-arch/perf_regs_riscv.c     | 126 ++++++++++++++++++
 tools/perf/util/perf_regs.c                   |   3 +
 tools/perf/util/perf_regs.h                   |   1 +
 3 files changed, 130 insertions(+)

diff --git a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
index 5b5f21fcba8c..51b43e68e0e3 100644
--- a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
+++ b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
@@ -1,8 +1,134 @@
 // SPDX-License-Identifier: GPL-2.0
 
+#include <errno.h>
+#include <regex.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+
+#include "../debug.h"
 #include "../perf_regs.h"
 #include "../../arch/riscv/include/perf_regs.h"
 
+/*
+ * RISC-V SDT argument formats (GCC 'nor' constraint):
+ *
+ * Register:  REG        e.g. a0, t1, s0, sp
+ * Memory:    NUM(REG)   e.g. 8(a0), -20(s0)
+ * Constant:  NUM        e.g. 99  (not supported by uprobe, skip)
+ *
+ * Uprobe target format:
+ *   Register: %REG       e.g. %a0
+ *   Memory:   +NUM(%REG) or -NUM(%REG)
+ */
+
+/* RISC-V register ABI names: zero, ra, sp, gp, tp, t0-t6, s0-s11, a0-a7 */
+#define SDT_OP_REGEX1  "^(zero|ra|sp|gp|tp|t[0-6]|s[0-9]|s1[01]|a[0-7])$"
+
+/* RISC-V memory operand: [-]NUM(REG) */
+#define SDT_OP_REGEX2  "^(\\-)?([0-9]+)\\((zero|ra|sp|gp|tp|t[0-6]|s[0-9]|s1[01]|a[0-7])\\)$"
+
+static regex_t sdt_op_regex1, sdt_op_regex2;
+
+static int sdt_init_op_regex(void)
+{
+	static int initialized;
+	int ret = 0;
+
+	if (initialized)
+		return 0;
+
+	ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
+	if (ret)
+		goto error;
+
+	ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
+	if (ret)
+		goto free_regex1;
+
+	initialized = 1;
+	return 0;
+
+free_regex1:
+	regfree(&sdt_op_regex1);
+error:
+	pr_debug4("Regex compilation error.\n");
+	return ret;
+}
+
+/*
+ * Parse OP and convert it into uprobe format.
+ * Possible variants of OP (RISC-V, GCC 'nor' constraint):
+ *
+ *   Format         Example       Uprobe
+ *   ----------------------------------------
+ *   REG            a0            %a0
+ *   NUM(REG)       8(a0)         +8(%a0)
+ *   -NUM(REG)      -20(s0)       -20(%s0)
+ *   NUM            99            (skip, constant not supported)
+ */
+int __perf_sdt_arg_parse_op_riscv(char *old_op, char **new_op)
+{
+	int ret, new_len;
+	regmatch_t rm[4];
+	char prefix;
+
+	/*
+	 * Constant argument: pure integer with no trailing '(' (e.g. "99", "-1").
+	 * uprobe does not support immediate values, so skip them.
+	 * Memory operands like "8(a0)" or "-20(s0)" contain '(' so are NOT
+	 * treated as constants here; they will be matched by REGEX2 below.
+	 */
+	if (strchr(old_op, '(') == NULL &&
+	    ((*old_op >= '0' && *old_op <= '9') ||
+	     (*old_op == '-' && old_op[1] >= '0' && old_op[1] <= '9'))) {
+		pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+		return SDT_ARG_SKIP;
+	}
+
+	ret = sdt_init_op_regex();
+	if (ret < 0)
+		return ret;
+
+	if (!regexec(&sdt_op_regex1, old_op, 2, rm, 0)) {
+		/* REG --> %REG */
+		new_len = 2;	/* % NULL */
+		new_len += (int)(rm[1].rm_eo - rm[1].rm_so);
+
+		*new_op = zalloc(new_len);
+		if (!*new_op)
+			return -ENOMEM;
+
+		scnprintf(*new_op, new_len, "%%%.*s",
+			(int)(rm[1].rm_eo - rm[1].rm_so), old_op + rm[1].rm_so);
+	} else if (!regexec(&sdt_op_regex2, old_op, 4, rm, 0)) {
+		/*
+		 * NUM(REG) or -NUM(REG) --> +NUM(%REG) or -NUM(%REG)
+		 * rm[1]: optional '-'
+		 * rm[2]: decimal offset
+		 * rm[3]: register name
+		 */
+		prefix = (rm[1].rm_so == -1) ? '+' : '-';
+
+		new_len = 5;	/* sign ( % ) NULL */
+		new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
+		new_len += (int)(rm[3].rm_eo - rm[3].rm_so);
+
+		*new_op = zalloc(new_len);
+		if (!*new_op)
+			return -ENOMEM;
+
+		scnprintf(*new_op, new_len, "%c%.*s(%%%.*s)", prefix,
+			(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
+			(int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so);
+	} else {
+		pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+		return SDT_ARG_SKIP;
+	}
+
+	return SDT_ARG_VALID;
+}
+
 uint64_t __perf_reg_mask_riscv(bool intr __maybe_unused)
 {
 	return PERF_REGS_MASK;
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 5b8f34beb24e..57a1d227d1b2 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -19,6 +19,9 @@ int perf_sdt_arg_parse_op(uint16_t e_machine, char *old_op, char **new_op)
 	case EM_PPC64:
 		ret = __perf_sdt_arg_parse_op_powerpc(old_op, new_op);
 		break;
+	case EM_RISCV:
+		ret = __perf_sdt_arg_parse_op_riscv(old_op, new_op);
+		break;
 	case EM_386:
 	case EM_X86_64:
 		ret = __perf_sdt_arg_parse_op_x86(old_op, new_op);
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index 7c04700bf837..e0b51b514ee0 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -53,6 +53,7 @@ const char *__perf_reg_name_powerpc(int id);
 uint64_t __perf_reg_ip_powerpc(void);
 uint64_t __perf_reg_sp_powerpc(void);
 
+int __perf_sdt_arg_parse_op_riscv(char *old_op, char **new_op);
 uint64_t __perf_reg_mask_riscv(bool intr);
 const char *__perf_reg_name_riscv(int id);
 uint64_t __perf_reg_ip_riscv(void);
-- 
2.50.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

                 reply	other threads:[~2026-04-14 12:47 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260414124655.129463-1-cp0613@linux.alibaba.com \
    --to=cp0613@linux.alibaba.com \
    --cc=acme@kernel.org \
    --cc=alex@ghiti.fr \
    --cc=dapeng1.mi@linux.intel.com \
    --cc=guoren@kernel.org \
    --cc=irogers@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=pjw@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox