From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
Masami Hiramatsu <mhiramat@kernel.org>,
Beau Belgrave <beaub@linux.microsoft.com>
Subject: [for-next][PATCH 09/16] user_events: Optimize writing events by only copying data once
Date: Fri, 18 Feb 2022 19:54:39 -0500 [thread overview]
Message-ID: <20220219005513.907430496@goodmis.org> (raw)
In-Reply-To: 20220219005430.848118506@goodmis.org
From: Beau Belgrave <beaub@linux.microsoft.com>
Pass iterator through to probes to allow copying data directly to the
probe buffers instead of taking multiple copies. Enables eBPF user and
raw iterator types out to programs for no-copy scenarios.
Link: https://lkml.kernel.org/r/20220118204326.2169-6-beaub@linux.microsoft.com
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
kernel/trace/trace_events_user.c | 115 +++++++++++++++++++++++--------
1 file changed, 85 insertions(+), 30 deletions(-)
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 371f31472156..78b6b96c4cfa 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -41,6 +41,9 @@
#define MAX_FIELD_ARRAY_SIZE 1024
#define MAX_FIELD_ARG_NAME 256
+#define MAX_BPF_COPY_SIZE PAGE_SIZE
+#define MAX_STACK_BPF_DATA 512
+
static char *register_page_data;
static DEFINE_MUTEX(reg_mutex);
@@ -78,8 +81,7 @@ struct user_event_refs {
struct user_event *events[];
};
-typedef void (*user_event_func_t) (struct user_event *user,
- void *data, u32 datalen,
+typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
void *tpdata);
static int user_event_parse(char *name, char *args, char *flags,
@@ -90,6 +92,20 @@ static u32 user_event_key(char *name)
return jhash(name, strlen(name), 0);
}
+static __always_inline __must_check
+size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i)
+{
+ size_t ret;
+
+ pagefault_disable();
+
+ ret = copy_from_iter_nocache(addr, bytes, i);
+
+ pagefault_enable();
+
+ return ret;
+}
+
static struct list_head *user_event_get_fields(struct trace_event_call *call)
{
struct user_event *user = (struct user_event *)call->data;
@@ -525,7 +541,7 @@ static struct user_event *find_user_event(char *name, u32 *outkey)
/*
* Writes the user supplied payload out to a trace file.
*/
-static void user_event_ftrace(struct user_event *user, void *data, u32 datalen,
+static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
void *tpdata)
{
struct trace_event_file *file;
@@ -541,41 +557,83 @@ static void user_event_ftrace(struct user_event *user, void *data, u32 datalen,
/* Allocates and fills trace_entry, + 1 of this is data payload */
entry = trace_event_buffer_reserve(&event_buffer, file,
- sizeof(*entry) + datalen);
+ sizeof(*entry) + i->count);
if (unlikely(!entry))
return;
- memcpy(entry + 1, data, datalen);
-
- trace_event_buffer_commit(&event_buffer);
+ if (unlikely(!copy_nofault(entry + 1, i->count, i)))
+ __trace_event_discard_commit(event_buffer.buffer,
+ event_buffer.event);
+ else
+ trace_event_buffer_commit(&event_buffer);
}
#ifdef CONFIG_PERF_EVENTS
+static void user_event_bpf(struct user_event *user, struct iov_iter *i)
+{
+ struct user_bpf_context context;
+ struct user_bpf_iter bpf_i;
+ char fast_data[MAX_STACK_BPF_DATA];
+ void *temp = NULL;
+
+ if ((user->flags & FLAG_BPF_ITER) && iter_is_iovec(i)) {
+ /* Raw iterator */
+ context.data_type = USER_BPF_DATA_ITER;
+ context.data_len = i->count;
+ context.iter = &bpf_i;
+
+ bpf_i.iov_offset = i->iov_offset;
+ bpf_i.iov = i->iov;
+ bpf_i.nr_segs = i->nr_segs;
+ } else if (i->nr_segs == 1 && iter_is_iovec(i)) {
+ /* Single buffer from user */
+ context.data_type = USER_BPF_DATA_USER;
+ context.data_len = i->count;
+ context.udata = i->iov->iov_base + i->iov_offset;
+ } else {
+ /* Multi buffer from user */
+ struct iov_iter copy = *i;
+ size_t copy_size = min_t(size_t, i->count, MAX_BPF_COPY_SIZE);
+
+ context.data_type = USER_BPF_DATA_KERNEL;
+ context.kdata = fast_data;
+
+ if (unlikely(copy_size > sizeof(fast_data))) {
+ temp = kmalloc(copy_size, GFP_NOWAIT);
+
+ if (temp)
+ context.kdata = temp;
+ else
+ copy_size = sizeof(fast_data);
+ }
+
+ context.data_len = copy_nofault(context.kdata,
+ copy_size, ©);
+ }
+
+ trace_call_bpf(&user->call, &context);
+
+ kfree(temp);
+}
+
/*
* Writes the user supplied payload out to perf ring buffer or eBPF program.
*/
-static void user_event_perf(struct user_event *user, void *data, u32 datalen,
+static void user_event_perf(struct user_event *user, struct iov_iter *i,
void *tpdata)
{
struct hlist_head *perf_head;
- if (bpf_prog_array_valid(&user->call)) {
- struct user_bpf_context context = {0};
-
- context.data_len = datalen;
- context.data_type = USER_BPF_DATA_KERNEL;
- context.kdata = data;
-
- trace_call_bpf(&user->call, &context);
- }
+ if (bpf_prog_array_valid(&user->call))
+ user_event_bpf(user, i);
perf_head = this_cpu_ptr(user->call.perf_events);
if (perf_head && !hlist_empty(perf_head)) {
struct trace_entry *perf_entry;
struct pt_regs *regs;
- size_t size = sizeof(*perf_entry) + datalen;
+ size_t size = sizeof(*perf_entry) + i->count;
int context;
perf_entry = perf_trace_buf_alloc(ALIGN(size, 8),
@@ -586,7 +644,10 @@ static void user_event_perf(struct user_event *user, void *data, u32 datalen,
perf_fetch_caller_regs(regs);
- memcpy(perf_entry + 1, data, datalen);
+ if (unlikely(!copy_nofault(perf_entry + 1, i->count, i))) {
+ perf_swevent_put_recursion_context(context);
+ return;
+ }
perf_trace_buf_submit(perf_entry, size, context,
user->call.event.type, 1, regs,
@@ -1024,16 +1085,11 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
if (likely(atomic_read(&tp->key.enabled) > 0)) {
struct tracepoint_func *probe_func_ptr;
user_event_func_t probe_func;
+ struct iov_iter copy;
void *tpdata;
- void *kdata;
- u32 datalen;
- kdata = kmalloc(i->count, GFP_KERNEL);
-
- if (unlikely(!kdata))
- return -ENOMEM;
-
- datalen = copy_from_iter(kdata, i->count, i);
+ if (unlikely(fault_in_iov_iter_readable(i, i->count)))
+ return -EFAULT;
rcu_read_lock_sched();
@@ -1041,15 +1097,14 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
if (probe_func_ptr) {
do {
+ copy = *i;
probe_func = probe_func_ptr->func;
tpdata = probe_func_ptr->data;
- probe_func(user, kdata, datalen, tpdata);
+ probe_func(user, ©, tpdata);
} while ((++probe_func_ptr)->func);
}
rcu_read_unlock_sched();
-
- kfree(kdata);
}
return ret;
--
2.34.1
next prev parent reply other threads:[~2022-02-19 0:55 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-19 0:54 [for-next][PATCH 00/16] tracing: Updates for 5.18 Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 01/16] tracing: Remove size restriction on tracing_log_err cmd strings Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 02/16] tracing: Remove size restriction on hist trigger cmd error logging Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 03/16] tracing: Remove size restriction on synthetic event " Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 04/16] tracing: Save both wakee and current on wakeup events Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 05/16] user_events: Add minimal support for trace_event into ftrace Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 06/16] user_events: Add print_fmt generation support for basic types Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 07/16] user_events: Handle matching arguments from dyn_events Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 08/16] user_events: Add basic perf and eBPF support Steven Rostedt
2022-02-19 0:54 ` Steven Rostedt [this message]
2022-02-19 0:54 ` [for-next][PATCH 10/16] user_events: Validate user payloads for size and null termination Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 11/16] user_events: Add self-test for ftrace integration Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 12/16] user_events: Add self-test for dynamic_events integration Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 13/16] user_events: Add self-test for perf_event integration Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 14/16] user_events: Add self-test for validator boundaries Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 15/16] user_events: Add sample code for typical usage Steven Rostedt
2022-02-19 0:54 ` [for-next][PATCH 16/16] user_events: Add documentation file Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220219005513.907430496@goodmis.org \
--to=rostedt@goodmis.org \
--cc=akpm@linux-foundation.org \
--cc=beaub@linux.microsoft.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mhiramat@kernel.org \
--cc=mingo@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.