public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] tracing: Replace kmap with copy_from_user() in trace_marker writing
@ 2016-12-08 17:40 Steven Rostedt
  2016-12-08 18:44 ` [PATCH v2] " Steven Rostedt
  0 siblings, 1 reply; 7+ messages in thread
From: Steven Rostedt @ 2016-12-08 17:40 UTC (permalink / raw)
  To: LKML; +Cc: Ingo Molnar, Peter Zijlstra, Thomas Gleixner, Henrik Austad

Instead of using get_user_pages_fast() and kmap_atomic() when writing
to the trace_marker file, just allocate enough space on the ring buffer
directly, and write into it via copy_from_user().

Writing into the trace_marker file use to allocate a temporary buffer
to perform the copy_from_user(), as we didn't want to write into the
ring buffer if the copy failed. But as a trace_marker write is suppose
to be extremely fast, and allocating memory causes other tracepoints to
trigger, Peter Zijlstra suggested using get_user_pages_fast() and
kmap_atomic() to keep the user space pages in memory and reading it
directly. But Henrik Austad had issues with this because that caused
other tracepoints to trigger as well.

Instead, just allocate the space in the ring buffer and use
copy_from_user() directly. If it faults, return -EFAULT and write
"<faulted>" into the ring buffer.

Cc: Henrik Austad <henrik@austad.us>
Cc: Peter Zijlstra <peterz@infradead.org>
Updates: d696b58ca2c3ca "tracing: Do not allocate buffer for trace_marker"
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 trace.c |  135 +++++++++++++++++-----------------------------------------------
 1 file changed, 37 insertions(+), 98 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 60416bf7c591..e0f8d814cec6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5738,61 +5738,6 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static inline int lock_user_pages(const char __user *ubuf, size_t cnt,
-				  struct page **pages, void **map_page,
-				  int *offset)
-{
-	unsigned long addr = (unsigned long)ubuf;
-	int nr_pages = 1;
-	int ret;
-	int i;
-
-	/*
-	 * Userspace is injecting traces into the kernel trace buffer.
-	 * We want to be as non intrusive as possible.
-	 * To do so, we do not want to allocate any special buffers
-	 * or take any locks, but instead write the userspace data
-	 * straight into the ring buffer.
-	 *
-	 * First we need to pin the userspace buffer into memory,
-	 * which, most likely it is, because it just referenced it.
-	 * But there's no guarantee that it is. By using get_user_pages_fast()
-	 * and kmap_atomic/kunmap_atomic() we can get access to the
-	 * pages directly. We then write the data directly into the
-	 * ring buffer.
-	 */
-
-	/* check if we cross pages */
-	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
-		nr_pages = 2;
-
-	*offset = addr & (PAGE_SIZE - 1);
-	addr &= PAGE_MASK;
-
-	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
-	if (ret < nr_pages) {
-		while (--ret >= 0)
-			put_page(pages[ret]);
-		return -EFAULT;
-	}
-
-	for (i = 0; i < nr_pages; i++)
-		map_page[i] = kmap_atomic(pages[i]);
-
-	return nr_pages;
-}
-
-static inline void unlock_user_pages(struct page **pages,
-				     void **map_page, int nr_pages)
-{
-	int i;
-
-	for (i = nr_pages - 1; i >= 0; i--) {
-		kunmap_atomic(map_page[i]);
-		put_page(pages[i]);
-	}
-}
-
 static ssize_t
 tracing_mark_write(struct file *filp, const char __user *ubuf,
 					size_t cnt, loff_t *fpos)
@@ -5803,13 +5748,15 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
 	struct print_entry *entry;
 	unsigned long irq_flags;
 	struct page *pages[2];
-	void *map_page[2];
-	int nr_pages = 1;
+	const char faulted[] = "<faulted>";
 	ssize_t written;
 	int offset;
 	int size;
 	int len;
 
+/* Used in tracing_mark_raw_write() as well */
+#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
+
 	if (tracing_disabled)
 		return -EINVAL;
 
@@ -5821,30 +5768,31 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
 
 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
 
-	nr_pages = lock_user_pages(ubuf, cnt, pages, map_page, &offset);
-	if (nr_pages < 0)
-		return nr_pages;
-
 	local_save_flags(irq_flags);
-	size = sizeof(*entry) + cnt + 2; /* possible \n added */
+	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
+
+	/* If less than "<faulted>", then make sure we can still add that */
+	if (cnt < FAULTED_SIZE)
+		size += FAULTED_SIZE - cnt;
+
 	buffer = tr->trace_buffer.buffer;
 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
 					    irq_flags, preempt_count());
-	if (!event) {
+	if (unlikely(!event))
 		/* Ring buffer disabled, return as if not open for write */
-		written = -EBADF;
-		goto out_unlock;
-	}
+		return -EBADF;
 
 	entry = ring_buffer_event_data(event);
 	entry->ip = _THIS_IP_;
 
-	if (nr_pages == 2) {
-		len = PAGE_SIZE - offset;
-		memcpy(&entry->buf, map_page[0] + offset, len);
-		memcpy(&entry->buf[len], map_page[1], cnt - len);
+	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
+	if (len) {
+		memcpy(&entry->buf, faulted, FAULTED_SIZE);
+		cnt = FAULTED_SIZE;
+		written = -EFAULT;
 	} else
-		memcpy(&entry->buf, map_page[0] + offset, cnt);
+		written = cnt;
+	len = cnt;
 
 	if (entry->buf[cnt - 1] != '\n') {
 		entry->buf[cnt] = '\n';
@@ -5854,12 +5802,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
 
 	__buffer_unlock_commit(buffer, event);
 
-	written = cnt;
-
-	*fpos += written;
-
- out_unlock:
-	unlock_user_pages(pages, map_page, nr_pages);
+	if (written > 0)
+		*fpos += written;
 
 	return written;
 }
@@ -5875,15 +5819,16 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
 	struct ring_buffer_event *event;
 	struct ring_buffer *buffer;
 	struct raw_data_entry *entry;
+	const char faulted[] = "<faulted>";
 	unsigned long irq_flags;
 	struct page *pages[2];
-	void *map_page[2];
-	int nr_pages = 1;
 	ssize_t written;
 	int offset;
 	int size;
 	int len;
 
+#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
+
 	if (tracing_disabled)
 		return -EINVAL;
 
@@ -5899,38 +5844,32 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
 
 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
 
-	nr_pages = lock_user_pages(ubuf, cnt, pages, map_page, &offset);
-	if (nr_pages < 0)
-		return nr_pages;
-
 	local_save_flags(irq_flags);
 	size = sizeof(*entry) + cnt;
+	if (cnt < FAULT_SIZE_ID)
+		size += FAULT_SIZE_ID - cnt;
+
 	buffer = tr->trace_buffer.buffer;
 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
 					    irq_flags, preempt_count());
-	if (!event) {
+	if (!event)
 		/* Ring buffer disabled, return as if not open for write */
-		written = -EBADF;
-		goto out_unlock;
-	}
+		return -EBADF;
 
 	entry = ring_buffer_event_data(event);
 
-	if (nr_pages == 2) {
-		len = PAGE_SIZE - offset;
-		memcpy(&entry->id, map_page[0] + offset, len);
-		memcpy(((char *)&entry->id) + len, map_page[1], cnt - len);
+	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
+	if (len) {
+		entry->id = -1;
+		memcpy(&entry->buf, faulted, FAULTED_SIZE);
+		written = -EFAULT;
 	} else
-		memcpy(&entry->id, map_page[0] + offset, cnt);
+		written = cnt;
 
 	__buffer_unlock_commit(buffer, event);
 
-	written = cnt;
-
-	*fpos += written;
-
- out_unlock:
-	unlock_user_pages(pages, map_page, nr_pages);
+	if (written > 0)
+		*fpos += written;
 
 	return written;
 }

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-12-09 13:56 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-12-08 17:40 [PATCH] tracing: Replace kmap with copy_from_user() in trace_marker writing Steven Rostedt
2016-12-08 18:44 ` [PATCH v2] " Steven Rostedt
2016-12-09  6:34   ` [PATCH] tracing: (backport) Replace kmap with copy_from_user() in trace_marker Henrik Austad
2016-12-09  7:22     ` Greg KH
2016-12-09  8:05       ` Henrik Austad
2016-12-09  8:26         ` Greg KH
2016-12-09 13:56         ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox