From: Steven Rostedt <rostedt@kernel.org>
To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Andrew Morton <akpm@linux-foundation.org>,
Peter Zijlstra <peterz@infradead.org>,
Namhyung Kim <namhyung@kernel.org>,
Takaya Saeki <takayas@google.com>,
Tom Zanussi <zanussi@kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Ian Rogers <irogers@google.com>,
Douglas Raillard <douglas.raillard@arm.com>
Subject: [PATCH v2 4/8] tracing: Have system call events record user array data
Date: Tue, 23 Sep 2025 09:05:01 -0400 [thread overview]
Message-ID: <20250923130714.101202613@kernel.org> (raw)
In-Reply-To: 20250923130457.901085554@kernel.org
From: Steven Rostedt <rostedt@goodmis.org>
For system call events that have a length field, add a "user_arg_size"
parameter to the system call meta data that denotes the index of the args
array that holds the size of arg that the user_mask field has a bit set
for.
The "user_mask" has a bit set that denotes the arg that points to an array
in the user space address space and if a system call event has the
user_mask field set and the user_arg_size set, it will then record the
content of that address into the trace event, up to the size defined by
SYSCALL_FAULT_BUF_SZ - 1.
This allows the output to look like:
sys_write(fd: 0xa, buf: 0x5646978d13c0 (01:00:05:00:00:00:00:00:01:87:55:89:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00), count: 0x20)
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
include/trace/syscall.h | 4 +-
kernel/trace/trace_syscalls.c | 111 +++++++++++++++++++++++++---------
2 files changed, 86 insertions(+), 29 deletions(-)
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 85f21ca15a41..9413c139da66 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -16,6 +16,7 @@
* @name: name of the syscall
* @syscall_nr: number of the syscall
* @nb_args: number of parameters it takes
+ * @user_arg_size: holds @arg that has size of the user space to read
* @user_mask: mask of @args that will read user space
* @types: list of types as strings
* @args: list of args as strings (args[i] matches types[i])
@@ -26,7 +27,8 @@
struct syscall_metadata {
const char *name;
int syscall_nr;
- short nb_args;
+ u8 nb_args;
+ s8 user_arg_size;
short user_mask;
const char **types;
const char **args;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 7ea763c07bb7..7658b592c55f 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -124,7 +124,7 @@ const char *get_syscall_name(int syscall)
return entry->name;
}
-/* Added to user strings when max limit is reached */
+/* Added to user strings or arrays when max limit is reached */
#define EXTRA "..."
static enum print_line_t
@@ -136,9 +136,8 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
struct trace_entry *ent = iter->ent;
struct syscall_trace_enter *trace;
struct syscall_metadata *entry;
- int i, syscall, val;
+ int i, syscall, val, len;
unsigned char *ptr;
- int len;
trace = (typeof(trace))ent;
syscall = trace->nr;
@@ -185,7 +184,23 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
ptr = (void *)ent + (val & 0xffff);
len = val >> 16;
- trace_seq_printf(s, " \"%.*s\"", len, ptr);
+ if (entry->user_arg_size < 0) {
+ trace_seq_printf(s, " \"%.*s\"", len, ptr);
+ continue;
+ }
+
+ val = trace->args[entry->user_arg_size];
+
+ trace_seq_puts(s, " (");
+ for (int x = 0; x < len; x++, ptr++) {
+ if (x)
+ trace_seq_putc(s, ':');
+ trace_seq_printf(s, "%02x", *ptr);
+ }
+ if (len < val)
+ trace_seq_printf(s, ", %s", EXTRA);
+
+ trace_seq_putc(s, ')');
}
trace_seq_putc(s, ')');
@@ -250,8 +265,11 @@ __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
if (!(BIT(i) & entry->user_mask))
continue;
- /* Add the format for the user space string */
- pos += snprintf(buf + pos, LEN_OR_ZERO, " \\\"%%s\\\"");
+ /* Add the format for the user space string or array */
+ if (entry->user_arg_size < 0)
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " \\\"%%s\\\"");
+ else
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " (%%s)");
}
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
@@ -260,9 +278,14 @@ __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
", ((unsigned long)(REC->%s))", entry->args[i]);
if (!(BIT(i) & entry->user_mask))
continue;
- /* The user space string for arg has name __<arg>_val */
- pos += snprintf(buf + pos, LEN_OR_ZERO, ", __get_str(__%s_val)",
- entry->args[i]);
+ /* The user space data for arg has name __<arg>_val */
+ if (entry->user_arg_size < 0) {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", __get_str(__%s_val)",
+ entry->args[i]);
+ } else {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", __print_dynamic_array(__%s_val, 1)",
+ entry->args[i]);
+ }
}
#undef LEN_OR_ZERO
@@ -333,9 +356,9 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
idx = ffs(mask) - 1;
/*
- * User space strings are faulted into a temporary buffer and then
- * added as a dynamic string to the end of the event.
- * The user space string name for the arg pointer is "__<arg>_val".
+ * User space data is faulted into a temporary buffer and then
+ * added as a dynamic string or array to the end of the event.
+ * The user space data name for the arg pointer is "__<arg>_val".
*/
len = strlen(meta->args[idx]) + sizeof("___val");
arg = kmalloc(len, GFP_KERNEL);
@@ -458,6 +481,7 @@ static char *sys_fault_user(struct syscall_metadata *sys_data, struct syscall_bu
unsigned long mask = sys_data->user_mask;
unsigned int cnt;
int idx = ffs(mask) - 1;
+ bool array = false;
char *ptr;
int trys = 0;
int ret;
@@ -500,6 +524,18 @@ static char *sys_fault_user(struct syscall_metadata *sys_data, struct syscall_bu
/* Read the current CPU context switch counter */
cnt = nr_context_switches_cpu(cpu);
+ /*
+ * If this system call event has a size argument, use
+ * it to define how much of user space memory to read,
+ * and read it as an array and not a string.
+ */
+ if (sys_data->user_arg_size >= 0) {
+ array = true;
+ size = args[sys_data->user_arg_size];
+ if (size > SYSCALL_FAULT_BUF_SZ - 1)
+ size = SYSCALL_FAULT_BUF_SZ - 1;
+ }
+
/*
* Preemption is going to be enabled, but this task must
* remain on this CPU.
@@ -512,7 +548,12 @@ static char *sys_fault_user(struct syscall_metadata *sys_data, struct syscall_bu
*/
preempt_enable_notrace();
- ret = strncpy_from_user(buf, ptr, size);
+ if (array) {
+ ret = __copy_from_user(buf, ptr, size);
+ ret = ret ? -1 : size;
+ } else {
+ ret = strncpy_from_user(buf, ptr, size);
+ }
preempt_disable_notrace();
migrate_enable();
@@ -530,22 +571,24 @@ static char *sys_fault_user(struct syscall_metadata *sys_data, struct syscall_bu
if (nr_context_switches_cpu(cpu) != cnt)
goto again;
- /* Replace any non-printable characters with '.' */
- for (int i = 0; i < ret; i++) {
- if (!isprint(buf[i]))
- buf[i] = '.';
- }
+ /* For strings, replace any non-printable characters with '.' */
+ if (!array) {
+ for (int i = 0; i < ret; i++) {
+ if (!isprint(buf[i]))
+ buf[i] = '.';
+ }
- /*
- * If the text was truncated due to our max limit, add "..." to
- * the string.
- */
- if (ret > SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA)) {
- strscpy(buf + SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA),
- EXTRA, sizeof(EXTRA));
- ret = SYSCALL_FAULT_BUF_SZ;
- } else {
- buf[ret++] = '\0';
+ /*
+ * If the text was truncated due to our max limit, add "..." to
+ * the string.
+ */
+ if (ret > SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA)) {
+ strscpy(buf + SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA),
+ EXTRA, sizeof(EXTRA));
+ ret = SYSCALL_FAULT_BUF_SZ;
+ } else {
+ buf[ret++] = '\0';
+ }
}
*data_size = ret;
@@ -642,6 +685,9 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
/* Store the offset and the size into the meta data */
*(int *)ptr = val | (user_size << 16);
+ if (WARN_ON_ONCE((ptr - (void *)entry + user_size) > size))
+ user_size = 0;
+
/* Nothing to do if the user space was empty or faulted */
if (user_size) {
/* Now store the user space data into the event */
@@ -795,7 +841,16 @@ static void check_faultable_syscall(struct trace_event_call *call, int nr)
if (sys_data->enter_event != call)
return;
+ sys_data->user_arg_size = -1;
+
switch (nr) {
+ /* user arg 1 with size arg at 2 */
+ case __NR_write:
+ case __NR_mq_timedsend:
+ case __NR_pwrite64:
+ sys_data->user_mask = BIT(1);
+ sys_data->user_arg_size = 2;
+ break;
/* user arg at position 0 */
case __NR_access:
case __NR_acct:
--
2.50.1
next prev parent reply other threads:[~2025-09-23 13:05 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-23 13:04 [PATCH v2 0/8] tracing: Show contents of syscall trace event user space fields Steven Rostedt
2025-09-23 13:04 ` [PATCH v2 1/8] tracing: Replace syscall RCU pointer assignment with READ/WRITE_ONCE() Steven Rostedt
2025-09-23 13:04 ` [PATCH v2 2/8] tracing: Have syscall trace events show "0x" for values greater than 10 Steven Rostedt
2025-09-23 13:05 ` [PATCH v2 3/8] tracing: Have syscall trace events read user space string Steven Rostedt
2025-09-25 7:26 ` Peter Zijlstra
2025-09-25 11:15 ` Steven Rostedt
2025-09-27 14:43 ` Steven Rostedt
2025-09-23 13:05 ` Steven Rostedt [this message]
2025-09-23 13:05 ` [PATCH v2 5/8] tracing: Display some syscall arrays as strings Steven Rostedt
2025-09-23 13:05 ` [PATCH v2 6/8] tracing: Allow syscall trace events to read more than one user parameter Steven Rostedt
2025-09-23 13:05 ` [PATCH v2 7/8] tracing: Add syscall_user_buf_size to limit amount written Steven Rostedt
2025-09-24 9:49 ` kernel test robot
2025-09-23 13:05 ` [PATCH v2 8/8] tracing: Show printable characters in syscall arrays Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250923130714.101202613@kernel.org \
--to=rostedt@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=douglas.raillard@arm.com \
--cc=irogers@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=takayas@google.com \
--cc=tglx@linutronix.de \
--cc=zanussi@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.