linux-trace-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Beau Belgrave <beaub@linux.microsoft.com>
To: rostedt@goodmis.org, mhiramat@kernel.org
Cc: linux-trace-devel@vger.kernel.org, linux-kernel@vger.kernel.org,
	beaub@linux.microsoft.com
Subject: [PATCH v4 09/10] user_events: Optimize writing events by only copying data once
Date: Thu,  4 Nov 2021 10:04:32 -0700	[thread overview]
Message-ID: <20211104170433.2206-10-beaub@linux.microsoft.com> (raw)
In-Reply-To: <20211104170433.2206-1-beaub@linux.microsoft.com>

Pass iterator through to probes to allow copying data directly to the
probe buffers instead of taking multiple copies. Enables eBPF user and
raw iterator types out to programs for no-copy scenarios.

Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
 kernel/trace/trace_events_user.c | 97 +++++++++++++++++++++++---------
 1 file changed, 69 insertions(+), 28 deletions(-)

diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index b5fe0550b489..d50118b9630a 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -39,6 +39,10 @@
 #define MAX_EVENT_DESC 512
 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
 
+#define MAX_BPF_COPY_SIZE PAGE_SIZE
+#define MAX_STACK_BPF_DATA 512
+#define copy_nofault copy_from_iter_nocache
+
 static char *register_page_data;
 
 static DEFINE_MUTEX(reg_mutex);
@@ -63,8 +67,7 @@ struct user_event_refs {
 	struct user_event *events[];
 };
 
-typedef void (*user_event_func_t) (struct user_event *user,
-				   void *data, u32 datalen,
+typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
 				   void *tpdata);
 
 static int user_event_parse(char *name, char *args, char *flags,
@@ -491,7 +494,7 @@ static struct user_event *find_user_event(char *name, u32 *outkey)
 /*
  * Writes the user supplied payload out to a trace file.
  */
-static void user_event_ftrace(struct user_event *user, void *data, u32 datalen,
+static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
 			      void *tpdata)
 {
 	struct trace_event_file *file;
@@ -506,41 +509,82 @@ static void user_event_ftrace(struct user_event *user, void *data, u32 datalen,
 		return;
 
 	entry = trace_event_buffer_reserve(&event_buffer, file,
-					   sizeof(*entry) + datalen);
+					   sizeof(*entry) + i->count);
 
 	if (unlikely(!entry))
 		return;
 
-	memcpy(entry + 1, data, datalen);
+	if (unlikely(!copy_nofault(entry + 1, i->count, i)))
+		return;
 
 	trace_event_buffer_commit(&event_buffer);
 }
 
 #ifdef CONFIG_PERF_EVENTS
+static void user_event_bpf(struct user_event *user, struct iov_iter *i)
+{
+	struct user_bpf_context context;
+	struct user_bpf_iter bpf_i;
+	char fast_data[MAX_STACK_BPF_DATA];
+	void *temp = NULL;
+
+	if ((user->flags & FLAG_BPF_ITER) && iter_is_iovec(i)) {
+		/* Raw iterator */
+		context.data_type = USER_BPF_DATA_ITER;
+		context.data_len = i->count;
+		context.iter = &bpf_i;
+
+		bpf_i.iov_offset = i->iov_offset;
+		bpf_i.iov = i->iov;
+		bpf_i.nr_segs = i->nr_segs;
+	} else if (i->nr_segs == 1 && iter_is_iovec(i)) {
+		/* Single buffer from user */
+		context.data_type = USER_BPF_DATA_USER;
+		context.data_len = i->count;
+		context.udata = i->iov->iov_base + i->iov_offset;
+	} else {
+		/* Multi buffer from user */
+		struct iov_iter copy = *i;
+		size_t copy_size = min(i->count, MAX_BPF_COPY_SIZE);
+
+		context.data_type = USER_BPF_DATA_KERNEL;
+		context.kdata = fast_data;
+
+		if (unlikely(copy_size > sizeof(fast_data))) {
+			temp = kmalloc(copy_size, GFP_NOWAIT);
+
+			if (temp)
+				context.kdata = temp;
+			else
+				copy_size = sizeof(fast_data);
+		}
+
+		context.data_len = copy_nofault(context.kdata,
+						copy_size, &copy);
+	}
+
+	trace_call_bpf(&user->call, &context);
+
+	kfree(temp);
+}
+
 /*
  * Writes the user supplied payload out to perf ring buffer or eBPF program.
  */
-static void user_event_perf(struct user_event *user, void *data, u32 datalen,
+static void user_event_perf(struct user_event *user, struct iov_iter *i,
 			    void *tpdata)
 {
 	struct hlist_head *perf_head;
 
-	if (bpf_prog_array_valid(&user->call)) {
-		struct user_bpf_context context = {0};
-
-		context.data_len = datalen;
-		context.data_type = USER_BPF_DATA_KERNEL;
-		context.kdata = data;
-
-		trace_call_bpf(&user->call, &context);
-	}
+	if (bpf_prog_array_valid(&user->call))
+		user_event_bpf(user, i);
 
 	perf_head = this_cpu_ptr(user->call.perf_events);
 
 	if (perf_head && !hlist_empty(perf_head)) {
 		struct trace_entry *perf_entry;
 		struct pt_regs *regs;
-		size_t size = sizeof(*perf_entry) + datalen;
+		size_t size = sizeof(*perf_entry) + i->count;
 		int context;
 
 		perf_entry = perf_trace_buf_alloc(ALIGN(size, 8),
@@ -551,7 +595,8 @@ static void user_event_perf(struct user_event *user, void *data, u32 datalen,
 
 		perf_fetch_caller_regs(regs);
 
-		memcpy(perf_entry + 1, data, datalen);
+		if (unlikely(!copy_nofault(perf_entry + 1, i->count, i)))
+			return;
 
 		perf_trace_buf_submit(perf_entry, size, context,
 				      user->call.event.type, 1, regs,
@@ -961,32 +1006,28 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
 	if (likely(atomic_read(&tp->key.enabled) > 0)) {
 		struct tracepoint_func *probe_func_ptr;
 		user_event_func_t probe_func;
+		struct iov_iter copy;
 		void *tpdata;
-		void *kdata;
-		u32 datalen;
-
-		kdata = kmalloc(i->count, GFP_KERNEL);
 
-		if (unlikely(!kdata))
-			return -ENOMEM;
-
-		datalen = copy_from_iter(kdata, i->count, i);
+		if (unlikely(iov_iter_fault_in_readable(i, i->count)))
+			return -EFAULT;
 
 		rcu_read_lock_sched();
+		pagefault_disable();
 
 		probe_func_ptr = rcu_dereference_sched(tp->funcs);
 
 		if (probe_func_ptr) {
 			do {
+				copy = *i;
 				probe_func = probe_func_ptr->func;
 				tpdata = probe_func_ptr->data;
-				probe_func(user, kdata, datalen, tpdata);
+				probe_func(user, &copy, tpdata);
 			} while ((++probe_func_ptr)->func);
 		}
 
+		pagefault_enable();
 		rcu_read_unlock_sched();
-
-		kfree(kdata);
 	}
 
 	return ret;
-- 
2.17.1


  parent reply	other threads:[~2021-11-04 17:04 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-04 17:04 [PATCH v4 00/10] user_events: Enable user processes to create and write to trace events Beau Belgrave
2021-11-04 17:04 ` [PATCH v4 01/10] user_events: Add UABI header for user access to user_events Beau Belgrave
2021-11-04 17:04 ` [PATCH v4 02/10] user_events: Add minimal support for trace_event into ftrace Beau Belgrave
2021-11-04 21:34   ` kernel test robot
2021-11-08  2:32     ` Masami Hiramatsu
2021-11-08 16:59       ` Beau Belgrave
2021-11-07 14:31   ` Masami Hiramatsu
2021-11-08 17:13     ` Beau Belgrave
2021-11-08 18:16       ` Steven Rostedt
2021-11-08 20:25         ` Beau Belgrave
2021-11-08 21:00           ` Steven Rostedt
2021-11-08 22:09             ` Beau Belgrave
2021-11-08 22:30               ` Steven Rostedt
2021-11-08 22:59                 ` Beau Belgrave
2021-11-09  4:58               ` Masami Hiramatsu
2021-11-09  2:56           ` Masami Hiramatsu
2021-11-09 19:08             ` Beau Belgrave
2021-11-09 19:25               ` Steven Rostedt
2021-11-09 20:14                 ` Beau Belgrave
2021-11-09 20:45                   ` Steven Rostedt
2021-11-09 21:27                     ` Beau Belgrave
2021-11-09 21:39                       ` Steven Rostedt
2021-11-10 13:56               ` Masami Hiramatsu
2021-11-11 17:33                 ` Beau Belgrave
2021-11-12 13:40                   ` Masami Hiramatsu
2021-11-07 18:18   ` Steven Rostedt
2021-11-08 19:56     ` Beau Belgrave
2021-11-08 20:53       ` Steven Rostedt
2021-11-08 21:15         ` Beau Belgrave
2021-11-04 17:04 ` [PATCH v4 03/10] user_events: Add print_fmt generation support for basic types Beau Belgrave
2021-11-08 22:03   ` Steven Rostedt
2021-11-04 17:04 ` [PATCH v4 04/10] user_events: Handle matching arguments from dyn_events Beau Belgrave
2021-11-08 22:05   ` Steven Rostedt
2021-11-04 17:04 ` [PATCH v4 05/10] user_events: Add basic perf and eBPF support Beau Belgrave
2021-11-04 17:04 ` [PATCH v4 06/10] user_events: Add self-test for ftrace integration Beau Belgrave
2021-11-04 17:04 ` [PATCH v4 07/10] user_events: Add self-test for dynamic_events integration Beau Belgrave
2021-11-04 17:04 ` [PATCH v4 08/10] user_events: Add self-test for perf_event integration Beau Belgrave
2021-11-04 17:04 ` Beau Belgrave [this message]
2021-11-08 22:45   ` [PATCH v4 09/10] user_events: Optimize writing events by only copying data once Steven Rostedt
2021-11-08 23:00     ` Beau Belgrave
2021-11-08 23:04       ` Steven Rostedt
2021-11-08 23:17         ` Beau Belgrave
2021-11-08 23:20           ` Steven Rostedt
2021-11-04 17:04 ` [PATCH v4 10/10] user_events: Add documentation file Beau Belgrave
2021-11-04 19:05   ` Jonathan Corbet
2021-11-04 21:08     ` Beau Belgrave
2021-11-04 21:18       ` Jonathan Corbet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211104170433.2206-10-beaub@linux.microsoft.com \
    --to=beaub@linux.microsoft.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-devel@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).