From: Masami Hiramatsu <mhiramat@kernel.org>
To: Beau Belgrave <beaub@linux.microsoft.com>
Cc: rostedt@goodmis.org, linux-trace-devel@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v8 08/12] user_events: Optimize writing events by only copying data once
Date: Thu, 23 Dec 2021 00:11:42 +0900 [thread overview]
Message-ID: <20211223001142.b972b667cd7d5a593999ce6c@kernel.org> (raw)
In-Reply-To: <20211216173511.10390-9-beaub@linux.microsoft.com>
On Thu, 16 Dec 2021 09:35:07 -0800
Beau Belgrave <beaub@linux.microsoft.com> wrote:
> Pass iterator through to probes to allow copying data directly to the
> probe buffers instead of taking multiple copies. Enables eBPF user and
> raw iterator types out to programs for no-copy scenarios.
This looks good to me, except for 1 nitpick. See below.
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
>
> Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
> ---
> kernel/trace/trace_events_user.c | 113 +++++++++++++++++++++++--------
> 1 file changed, 85 insertions(+), 28 deletions(-)
>
> diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
> index cc30d1fcbb63..fa3e26281fc3 100644
> --- a/kernel/trace/trace_events_user.c
> +++ b/kernel/trace/trace_events_user.c
> @@ -41,6 +41,9 @@
> #define MAX_FIELD_ARRAY_SIZE 1024
> #define MAX_FIELD_ARG_NAME 256
>
> +#define MAX_BPF_COPY_SIZE PAGE_SIZE
> +#define MAX_STACK_BPF_DATA 512
> +
> static char *register_page_data;
>
> static DEFINE_MUTEX(reg_mutex);
> @@ -78,8 +81,7 @@ struct user_event_refs {
> struct user_event *events[];
> };
>
> -typedef void (*user_event_func_t) (struct user_event *user,
> - void *data, u32 datalen,
> +typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
> void *tpdata);
>
> static int user_event_parse(char *name, char *args, char *flags,
> @@ -90,6 +92,20 @@ static u32 user_event_key(char *name)
> return jhash(name, strlen(name), 0);
> }
>
> +static __always_inline __must_check
> +size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i)
> +{
> + size_t ret;
> +
> + pagefault_disable();
> +
> + ret = copy_from_iter_nocache(addr, bytes, i);
> +
> + pagefault_enable();
> +
> + return ret;
> +}
> +
> static struct list_head *user_event_get_fields(struct trace_event_call *call)
> {
> struct user_event *user = (struct user_event *)call->data;
> @@ -524,7 +540,7 @@ static struct user_event *find_user_event(char *name, u32 *outkey)
> /*
> * Writes the user supplied payload out to a trace file.
> */
> -static void user_event_ftrace(struct user_event *user, void *data, u32 datalen,
> +static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
> void *tpdata)
> {
> struct trace_event_file *file;
> @@ -540,41 +556,85 @@ static void user_event_ftrace(struct user_event *user, void *data, u32 datalen,
>
> /* Allocates and fills trace_entry, + 1 of this is data payload */
> entry = trace_event_buffer_reserve(&event_buffer, file,
> - sizeof(*entry) + datalen);
> + sizeof(*entry) + i->count);
>
> if (unlikely(!entry))
> return;
>
> - memcpy(entry + 1, data, datalen);
> + if (unlikely(!copy_nofault(entry + 1, i->count, i))) {
> + __trace_event_discard_commit(event_buffer.buffer,
> + event_buffer.event);
> + return;
> + }
>
> trace_event_buffer_commit(&event_buffer);
Will this be
if (unlikely(!copy_nofault(entry + 1, i->count, i)))
__trace_event_discard_commit(event_buffer.buffer,
event_buffer.event);
else
trace_event_buffer_commit(&event_buffer);
?
Thank you,
> }
>
> #ifdef CONFIG_PERF_EVENTS
> +static void user_event_bpf(struct user_event *user, struct iov_iter *i)
> +{
> + struct user_bpf_context context;
> + struct user_bpf_iter bpf_i;
> + char fast_data[MAX_STACK_BPF_DATA];
> + void *temp = NULL;
> +
> + if ((user->flags & FLAG_BPF_ITER) && iter_is_iovec(i)) {
> + /* Raw iterator */
> + context.data_type = USER_BPF_DATA_ITER;
> + context.data_len = i->count;
> + context.iter = &bpf_i;
> +
> + bpf_i.iov_offset = i->iov_offset;
> + bpf_i.iov = i->iov;
> + bpf_i.nr_segs = i->nr_segs;
> + } else if (i->nr_segs == 1 && iter_is_iovec(i)) {
> + /* Single buffer from user */
> + context.data_type = USER_BPF_DATA_USER;
> + context.data_len = i->count;
> + context.udata = i->iov->iov_base + i->iov_offset;
> + } else {
> + /* Multi buffer from user */
> + struct iov_iter copy = *i;
> + size_t copy_size = min_t(size_t, i->count, MAX_BPF_COPY_SIZE);
> +
> + context.data_type = USER_BPF_DATA_KERNEL;
> + context.kdata = fast_data;
> +
> + if (unlikely(copy_size > sizeof(fast_data))) {
> + temp = kmalloc(copy_size, GFP_NOWAIT);
> +
> + if (temp)
> + context.kdata = temp;
> + else
> + copy_size = sizeof(fast_data);
> + }
> +
> + context.data_len = copy_nofault(context.kdata,
> + copy_size, ©);
> + }
> +
> + trace_call_bpf(&user->call, &context);
> +
> + kfree(temp);
> +}
> +
> /*
> * Writes the user supplied payload out to perf ring buffer or eBPF program.
> */
> -static void user_event_perf(struct user_event *user, void *data, u32 datalen,
> +static void user_event_perf(struct user_event *user, struct iov_iter *i,
> void *tpdata)
> {
> struct hlist_head *perf_head;
>
> - if (bpf_prog_array_valid(&user->call)) {
> - struct user_bpf_context context = {0};
> -
> - context.data_len = datalen;
> - context.data_type = USER_BPF_DATA_KERNEL;
> - context.kdata = data;
> -
> - trace_call_bpf(&user->call, &context);
> - }
> + if (bpf_prog_array_valid(&user->call))
> + user_event_bpf(user, i);
>
> perf_head = this_cpu_ptr(user->call.perf_events);
>
> if (perf_head && !hlist_empty(perf_head)) {
> struct trace_entry *perf_entry;
> struct pt_regs *regs;
> - size_t size = sizeof(*perf_entry) + datalen;
> + size_t size = sizeof(*perf_entry) + i->count;
> int context;
>
> perf_entry = perf_trace_buf_alloc(ALIGN(size, 8),
> @@ -585,7 +645,10 @@ static void user_event_perf(struct user_event *user, void *data, u32 datalen,
>
> perf_fetch_caller_regs(regs);
>
> - memcpy(perf_entry + 1, data, datalen);
> + if (unlikely(!copy_nofault(perf_entry + 1, i->count, i))) {
> + perf_swevent_put_recursion_context(context);
> + return;
> + }
>
> perf_trace_buf_submit(perf_entry, size, context,
> user->call.event.type, 1, regs,
> @@ -1018,16 +1081,11 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
> if (likely(atomic_read(&tp->key.enabled) > 0)) {
> struct tracepoint_func *probe_func_ptr;
> user_event_func_t probe_func;
> + struct iov_iter copy;
> void *tpdata;
> - void *kdata;
> - u32 datalen;
> -
> - kdata = kmalloc(i->count, GFP_KERNEL);
>
> - if (unlikely(!kdata))
> - return -ENOMEM;
> -
> - datalen = copy_from_iter(kdata, i->count, i);
> + if (unlikely(iov_iter_fault_in_readable(i, i->count)))
> + return -EFAULT;
>
> rcu_read_lock_sched();
>
> @@ -1035,15 +1093,14 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
>
> if (probe_func_ptr) {
> do {
> + copy = *i;
> probe_func = probe_func_ptr->func;
> tpdata = probe_func_ptr->data;
> - probe_func(user, kdata, datalen, tpdata);
> + probe_func(user, ©, tpdata);
> } while ((++probe_func_ptr)->func);
> }
>
> rcu_read_unlock_sched();
> -
> - kfree(kdata);
> }
>
> return ret;
> --
> 2.17.1
>
--
Masami Hiramatsu <mhiramat@kernel.org>
next prev parent reply other threads:[~2021-12-22 15:11 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-16 17:34 [PATCH v8 00/12] user_events: Enable user processes to create and write to trace events Beau Belgrave
2021-12-16 17:35 ` [PATCH v8 01/12] user_events: Add minimal support for trace_event into ftrace Beau Belgrave
2021-12-21 15:16 ` Masami Hiramatsu
2022-01-03 18:22 ` Beau Belgrave
2021-12-16 17:35 ` [PATCH v8 02/12] user_events: Add print_fmt generation support for basic types Beau Belgrave
2021-12-22 0:30 ` Masami Hiramatsu
2022-01-03 18:56 ` Beau Belgrave
2021-12-16 17:35 ` [PATCH v8 03/12] user_events: Handle matching arguments from dyn_events Beau Belgrave
2021-12-22 6:19 ` Masami Hiramatsu
2021-12-16 17:35 ` [PATCH v8 04/12] user_events: Add basic perf and eBPF support Beau Belgrave
2021-12-22 7:55 ` Masami Hiramatsu
2021-12-16 17:35 ` [PATCH v8 05/12] user_events: Add self-test for ftrace integration Beau Belgrave
2021-12-16 17:35 ` [PATCH v8 06/12] user_events: Add self-test for dynamic_events integration Beau Belgrave
2021-12-16 17:35 ` [PATCH v8 07/12] user_events: Add self-test for perf_event integration Beau Belgrave
2021-12-16 17:35 ` [PATCH v8 08/12] user_events: Optimize writing events by only copying data once Beau Belgrave
2021-12-22 15:11 ` Masami Hiramatsu [this message]
2022-01-03 18:58 ` Beau Belgrave
2022-01-06 22:17 ` Steven Rostedt
2022-01-06 23:05 ` Beau Belgrave
2021-12-16 17:35 ` [PATCH v8 09/12] user_events: Add documentation file Beau Belgrave
2021-12-22 14:18 ` Masami Hiramatsu
2022-01-03 23:01 ` Beau Belgrave
2022-01-06 21:14 ` Steven Rostedt
2021-12-16 17:35 ` [PATCH v8 10/12] user_events: Add sample code for typical usage Beau Belgrave
2021-12-22 23:18 ` Masami Hiramatsu
2022-01-06 22:09 ` Steven Rostedt
2022-01-06 23:06 ` Beau Belgrave
2021-12-16 17:35 ` [PATCH v8 11/12] user_events: Validate user payloads for size and null termination Beau Belgrave
2021-12-23 0:08 ` Masami Hiramatsu
2022-01-03 18:53 ` Beau Belgrave
2022-01-06 23:32 ` Masami Hiramatsu
2022-01-07 1:01 ` Beau Belgrave
2021-12-16 17:35 ` [PATCH v8 12/12] user_events: Add self-test for validator boundaries Beau Belgrave
2022-04-18 20:43 ` [PATCH v8 00/12] user_events: Enable user processes to create and write to trace events Hagen Paul Pfeifer
2022-04-19 0:25 ` Beau Belgrave
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211223001142.b972b667cd7d5a593999ce6c@kernel.org \
--to=mhiramat@kernel.org \
--cc=beaub@linux.microsoft.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-devel@vger.kernel.org \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).