From: Steven Rostedt <rostedt@kernel.org>
To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
linux-perf-users@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Andrew Morton <akpm@linux-foundation.org>,
Peter Zijlstra <peterz@infradead.org>,
Namhyung Kim <namhyung@kernel.org>,
Takaya Saeki <takayas@google.com>,
Tom Zanussi <zanussi@kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Ian Rogers <irogers@google.com>,
Douglas Raillard <douglas.raillard@arm.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Jiri Olsa <jolsa@kernel.org>,
Adrian Hunter <adrian.hunter@intel.com>,
Ingo Molnar <mingo@redhat.com>
Subject: [PATCH v3 05/13] tracing: Have system call events record user array data
Date: Wed, 15 Oct 2025 13:32:19 -0400 [thread overview]
Message-ID: <20251015173548.877256395@kernel.org> (raw)
In-Reply-To: 20251015173214.760495866@kernel.org
From: Steven Rostedt <rostedt@goodmis.org>
For system call events that have a length field, add a "user_arg_size"
parameter to the system call meta data that denotes the index of the args
array that holds the size of arg that the user_mask field has a bit set
for.
The "user_mask" has a bit set that denotes the arg that points to an array
in the user space address space and if a system call event has the
user_mask field set and the user_arg_size set, it will then record the
content of that address into the trace event, up to the size defined by
SYSCALL_FAULT_BUF_SZ - 1.
This allows the output to look like:
sys_write(fd: 0xa, buf: 0x5646978d13c0 (01:00:05:00:00:00:00:00:01:87:55:89:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00), count: 0x20)
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
include/trace/syscall.h | 4 +-
kernel/trace/trace_syscalls.c | 121 ++++++++++++++++++++++++----------
2 files changed, 90 insertions(+), 35 deletions(-)
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 85f21ca15a41..9413c139da66 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -16,6 +16,7 @@
* @name: name of the syscall
* @syscall_nr: number of the syscall
* @nb_args: number of parameters it takes
+ * @user_arg_size: holds @arg that has size of the user space to read
* @user_mask: mask of @args that will read user space
* @types: list of types as strings
* @args: list of args as strings (args[i] matches types[i])
@@ -26,7 +27,8 @@
struct syscall_metadata {
const char *name;
int syscall_nr;
- short nb_args;
+ u8 nb_args;
+ s8 user_arg_size;
short user_mask;
const char **types;
const char **args;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index c1dfc3208a12..8da2a1d38042 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -124,7 +124,7 @@ const char *get_syscall_name(int syscall)
return entry->name;
}
-/* Added to user strings when max limit is reached */
+/* Added to user strings or arrays when max limit is reached */
#define EXTRA "..."
static enum print_line_t
@@ -136,9 +136,8 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
struct trace_entry *ent = iter->ent;
struct syscall_trace_enter *trace;
struct syscall_metadata *entry;
- int i, syscall, val;
+ int i, syscall, val, len;
unsigned char *ptr;
- int len;
trace = (typeof(trace))ent;
syscall = trace->nr;
@@ -185,7 +184,23 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
ptr = (void *)ent + (val & 0xffff);
len = val >> 16;
- trace_seq_printf(s, " \"%.*s\"", len, ptr);
+ if (entry->user_arg_size < 0) {
+ trace_seq_printf(s, " \"%.*s\"", len, ptr);
+ continue;
+ }
+
+ val = trace->args[entry->user_arg_size];
+
+ trace_seq_puts(s, " (");
+ for (int x = 0; x < len; x++, ptr++) {
+ if (x)
+ trace_seq_putc(s, ':');
+ trace_seq_printf(s, "%02x", *ptr);
+ }
+ if (len < val)
+ trace_seq_printf(s, ", %s", EXTRA);
+
+ trace_seq_putc(s, ')');
}
trace_seq_putc(s, ')');
@@ -250,8 +265,11 @@ __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
if (!(BIT(i) & entry->user_mask))
continue;
- /* Add the format for the user space string */
- pos += snprintf(buf + pos, LEN_OR_ZERO, " \\\"%%s\\\"");
+ /* Add the format for the user space string or array */
+ if (entry->user_arg_size < 0)
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " \\\"%%s\\\"");
+ else
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " (%%s)");
}
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
@@ -260,9 +278,14 @@ __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
", ((unsigned long)(REC->%s))", entry->args[i]);
if (!(BIT(i) & entry->user_mask))
continue;
- /* The user space string for arg has name __<arg>_val */
- pos += snprintf(buf + pos, LEN_OR_ZERO, ", __get_str(__%s_val)",
- entry->args[i]);
+ /* The user space data for arg has name __<arg>_val */
+ if (entry->user_arg_size < 0) {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", __get_str(__%s_val)",
+ entry->args[i]);
+ } else {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", __print_dynamic_array(__%s_val, 1)",
+ entry->args[i]);
+ }
}
#undef LEN_OR_ZERO
@@ -333,9 +356,9 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
idx = ffs(mask) - 1;
/*
- * User space strings are faulted into a temporary buffer and then
- * added as a dynamic string to the end of the event.
- * The user space string name for the arg pointer is "__<arg>_val".
+ * User space data is faulted into a temporary buffer and then
+ * added as a dynamic string or array to the end of the event.
+ * The user space data name for the arg pointer is "__<arg>_val".
*/
len = strlen(meta->args[idx]) + sizeof("___val");
arg = kmalloc(len, GFP_KERNEL);
@@ -431,9 +454,11 @@ static char *sys_fault_user(struct syscall_metadata *sys_data,
struct syscall_user_buffer *sbuf,
unsigned long *args, unsigned int *data_size)
{
+ trace_user_buf_copy syscall_copy = syscall_copy_user;
unsigned long size = SYSCALL_FAULT_BUF_SZ - 1;
unsigned long mask = sys_data->user_mask;
int idx = ffs(mask) - 1;
+ bool array = false;
char *ptr;
char *buf;
@@ -441,27 +466,43 @@ static char *sys_fault_user(struct syscall_metadata *sys_data,
ptr = (char *)args[idx];
*data_size = 0;
+ /*
+ * If this system call event has a size argument, use
+ * it to define how much of user space memory to read,
+ * and read it as an array and not a string.
+ */
+ if (sys_data->user_arg_size >= 0) {
+ array = true;
+ size = args[sys_data->user_arg_size];
+ if (size > SYSCALL_FAULT_BUF_SZ - 1)
+ size = SYSCALL_FAULT_BUF_SZ - 1;
+ /* use normal copy_from_user() */
+ syscall_copy = NULL;
+ }
+
buf = trace_user_fault_read(&sbuf->buf, ptr, size,
- syscall_copy_user, &size);
+ syscall_copy, &size);
if (!buf)
return NULL;
- /* Replace any non-printable characters with '.' */
- for (int i = 0; i < size; i++) {
- if (!isprint(buf[i]))
- buf[i] = '.';
- }
+ /* For strings, replace any non-printable characters with '.' */
+ if (!array) {
+ for (int i = 0; i < size; i++) {
+ if (!isprint(buf[i]))
+ buf[i] = '.';
+ }
- /*
- * If the text was truncated due to our max limit, add "..." to
- * the string.
- */
- if (size > SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA)) {
- strscpy(buf + SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA),
- EXTRA, sizeof(EXTRA));
- size = SYSCALL_FAULT_BUF_SZ;
- } else {
- buf[size++] = '\0';
+ /*
+ * If the text was truncated due to our max limit, add "..." to
+ * the string.
+ */
+ if (size > SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA)) {
+ strscpy(buf + SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA),
+ EXTRA, sizeof(EXTRA));
+ size = SYSCALL_FAULT_BUF_SZ;
+ } else {
+ buf[size++] = '\0';
+ }
}
*data_size = size;
@@ -492,7 +533,7 @@ syscall_get_data(struct syscall_metadata *sys_data, unsigned long *args,
static void syscall_put_data(struct syscall_metadata *sys_data,
struct syscall_trace_enter *entry,
- char *buffer, int size)
+ char *buffer, int size, int user_size)
{
void *ptr;
int val;
@@ -510,13 +551,16 @@ static void syscall_put_data(struct syscall_metadata *sys_data,
val = (ptr - (void *)entry) + 4;
/* Store the offset and the size into the meta data */
- *(int *)ptr = val | (size << 16);
+ *(int *)ptr = val | (user_size << 16);
+
+ if (WARN_ON_ONCE((ptr - (void *)entry + user_size) > size))
+ user_size = 0;
/* Nothing to do if the user space was empty or faulted */
- if (size) {
+ if (user_size) {
/* Now store the user space data into the event */
ptr += 4;
- memcpy(ptr, buffer, size);
+ memcpy(ptr, buffer, user_size);
}
}
@@ -580,7 +624,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
if (mayfault)
- syscall_put_data(sys_data, entry, user_ptr, user_size);
+ syscall_put_data(sys_data, entry, user_ptr, size, user_size);
trace_event_buffer_commit(&fbuffer);
}
@@ -727,7 +771,16 @@ static void check_faultable_syscall(struct trace_event_call *call, int nr)
if (sys_data->enter_event != call)
return;
+ sys_data->user_arg_size = -1;
+
switch (nr) {
+ /* user arg 1 with size arg at 2 */
+ case __NR_write:
+ case __NR_mq_timedsend:
+ case __NR_pwrite64:
+ sys_data->user_mask = BIT(1);
+ sys_data->user_arg_size = 2;
+ break;
/* user arg at position 0 */
case __NR_access:
case __NR_acct:
@@ -1025,7 +1078,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
if (mayfault)
- syscall_put_data(sys_data, rec, user_ptr, user_size);
+ syscall_put_data(sys_data, rec, user_ptr, size, user_size);
if ((valid_prog_array &&
!perf_call_bpf_enter(sys_data->enter_event, fake_regs, sys_data, rec)) ||
--
2.51.0
next prev parent reply other threads:[~2025-10-15 17:35 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-15 17:32 [PATCH v3 00/13] tracing: Show contents of syscall trace event user space fields Steven Rostedt
2025-10-15 17:32 ` [PATCH v3 01/13] tracing: Make trace_user_fault_read() exposed to rest of tracing Steven Rostedt
2025-10-15 17:32 ` [PATCH v3 02/13] tracing: Have syscall trace events read user space string Steven Rostedt
2025-10-15 17:32 ` [PATCH v3 03/13] perf: tracing: Simplify perf_sysenter_enable/disable() with guards Steven Rostedt
2025-10-16 11:05 ` kernel test robot
2025-10-15 17:32 ` [PATCH v3 04/13] perf: tracing: Have perf system calls read user space Steven Rostedt
2025-10-15 17:32 ` Steven Rostedt [this message]
2025-10-15 17:32 ` [PATCH v3 06/13] tracing: Display some syscall arrays as strings Steven Rostedt
2025-10-15 17:32 ` [PATCH v3 07/13] tracing: Allow syscall trace events to read more than one user parameter Steven Rostedt
2025-10-15 17:32 ` [PATCH v3 08/13] tracing: Add a config and syscall_user_buf_size file to limit amount written Steven Rostedt
2025-10-15 17:32 ` [PATCH v3 09/13] tracing: Show printable characters in syscall arrays Steven Rostedt
2025-10-15 17:32 ` [PATCH v3 10/13] tracing: Add trace_seq_pop() and seq_buf_pop() Steven Rostedt
2025-10-15 17:32 ` [PATCH v3 11/13] tracing: Add parsing of flags to the sys_enter_openat trace event Steven Rostedt
2025-10-15 17:32 ` [PATCH v3 12/13] tracing: Check for printable characters when printing field dyn strings Steven Rostedt
2025-10-20 12:19 ` Douglas Raillard
2025-10-20 18:47 ` Steven Rostedt
2025-10-15 17:32 ` [PATCH v3 13/13] tracing: Have persistent ring buffer print syscalls normally Steven Rostedt
2025-10-16 10:33 ` kernel test robot
2025-10-16 11:36 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251015173548.877256395@kernel.org \
--to=rostedt@kernel.org \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=akpm@linux-foundation.org \
--cc=douglas.raillard@arm.com \
--cc=irogers@google.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=takayas@google.com \
--cc=tglx@linutronix.de \
--cc=zanussi@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.