From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@elte.hu>,
Andrew Morton <akpm@linux-foundation.org>,
Peter Zijlstra <peterz@infradead.org>,
Frederic Weisbecker <fweisbec@gmail.com>,
Theodore Tso <tytso@mit.edu>,
Arjan van de Ven <arjan@infradead.org>,
Pekka Paalanen <pq@iki.fi>,
Arnaldo Carvalho de Melo <acme@redhat.com>,
"H. Peter Anvin" <hpa@zytor.com>,
Mathieu Desnoyers <compudj@krystal.dyndns.org>,
Martin Bligh <mbligh@google.com>,
"Frank Ch. Eigler" <fche@redhat.com>,
Tom Zanussi <tzanussi@gmail.com>,
Masami Hiramatsu <mhiramat@redhat.com>,
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
Jason Baron <jbaron@redhat.com>,
Christoph Hellwig <hch@infradead.org>,
Jiaying Zhang <jiayingz@google.com>,
Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>,
mrubin@google.com, md@google.com,
Steven Rostedt <srostedt@redhat.com>
Subject: [PATCH 5/5] tracing: add binary buffer files for use with splice
Date: Tue, 03 Mar 2009 21:49:26 -0500 [thread overview]
Message-ID: <20090304025251.015368537@goodmis.org> (raw)
In-Reply-To: 20090304024921.153061228@goodmis.org
[-- Attachment #1: 0005-tracing-add-binary-buffer-files-for-use-with-splice.patch --]
[-- Type: text/plain, Size: 8579 bytes --]
From: Steven Rostedt <srostedt@redhat.com>
Impact: new feature
This patch creates a directory of files that correspond to the
per CPU ring buffers. These are binary files and are made to
be used with splice. This is the fastest way to extract data from
the ftrace ring buffers.
Thanks to Jiaying Zhang for pushing me to get this code fixed,
and to Eduard - Gabriel Munteanu for his splice code that helped
me debug my code.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
kernel/trace/trace.c | 274 ++++++++++++++++++++++++++++++++++++++++++++++++--
kernel/trace/trace.h | 1 +
2 files changed, 268 insertions(+), 7 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ea055aa..12539f7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -11,31 +11,30 @@
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 William Lee Irwin III
*/
+#include <linux/ring_buffer.h>
#include <linux/utsrelease.h>
+#include <linux/stacktrace.h>
+#include <linux/writeback.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/notifier.h>
+#include <linux/irqflags.h>
#include <linux/debugfs.h>
#include <linux/pagemap.h>
#include <linux/hardirq.h>
#include <linux/linkage.h>
#include <linux/uaccess.h>
+#include <linux/kprobes.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/percpu.h>
+#include <linux/splice.h>
#include <linux/kdebug.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/gfp.h>
#include <linux/fs.h>
-#include <linux/kprobes.h>
-#include <linux/writeback.h>
-#include <linux/splice.h>
-
-#include <linux/stacktrace.h>
-#include <linux/ring_buffer.h>
-#include <linux/irqflags.h>
#include "trace.h"
#include "trace_output.h"
@@ -3005,6 +3004,246 @@ static struct file_operations tracing_mark_fops = {
.write = tracing_mark_write,
};
+struct ftrace_buffer_info {
+ struct trace_array *tr;
+ void *spare;
+ int cpu;
+ unsigned int read;
+};
+
+static int tracing_buffers_open(struct inode *inode, struct file *filp)
+{
+ int cpu = (int)(long)inode->i_private;
+ struct ftrace_buffer_info *info;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->tr = &global_trace;
+ info->cpu = cpu;
+ info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
+ /* Force reading ring buffer for first read */
+ info->read = (unsigned int)-1;
+ if (!info->spare)
+ goto out;
+
+ filp->private_data = info;
+
+ return 0;
+
+ out:
+ kfree(info);
+ return -ENOMEM;
+}
+
+static ssize_t
+tracing_buffers_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct ftrace_buffer_info *info = filp->private_data;
+ unsigned int pos;
+ ssize_t ret;
+ size_t size;
+
+ /* Do we have previous read data to read? */
+ if (info->read < PAGE_SIZE)
+ goto read;
+
+ info->read = 0;
+
+ ret = ring_buffer_read_page(info->tr->buffer,
+ &info->spare,
+ count,
+ info->cpu, 0);
+ if (ret < 0)
+ return 0;
+
+ pos = ring_buffer_page_len(info->spare);
+
+ if (pos < PAGE_SIZE)
+ memset(info->spare + pos, 0, PAGE_SIZE - pos);
+
+read:
+ size = PAGE_SIZE - info->read;
+ if (size > count)
+ size = count;
+
+ ret = copy_to_user(ubuf, info->spare + info->read, size);
+ if (ret)
+ return -EFAULT;
+ *ppos += size;
+ info->read += size;
+
+ return size;
+}
+
+static int tracing_buffers_release(struct inode *inode, struct file *file)
+{
+ struct ftrace_buffer_info *info = file->private_data;
+
+ ring_buffer_free_read_page(info->tr->buffer, info->spare);
+ kfree(info);
+
+ return 0;
+}
+
+struct buffer_ref {
+ struct ring_buffer *buffer;
+ void *page;
+ int ref;
+};
+
+static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ struct buffer_ref *ref = (struct buffer_ref *)buf->private;
+
+ if (--ref->ref)
+ return;
+
+ ring_buffer_free_read_page(ref->buffer, ref->page);
+ kfree(ref);
+ buf->private = 0;
+}
+
+static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return 1;
+}
+
+static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ struct buffer_ref *ref = (struct buffer_ref *)buf->private;
+
+ ref->ref++;
+}
+
+/* Pipe buffer operations for a buffer. */
+static struct pipe_buf_operations buffer_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = buffer_pipe_buf_release,
+ .steal = buffer_pipe_buf_steal,
+ .get = buffer_pipe_buf_get,
+};
+
+/*
+ * Callback from splice_to_pipe(), if we need to release some pages
+ * at the end of the spd in case we error'ed out in filling the pipe.
+ */
+static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+{
+ struct buffer_ref *ref =
+ (struct buffer_ref *)spd->partial[i].private;
+
+ if (--ref->ref)
+ return;
+
+ ring_buffer_free_read_page(ref->buffer, ref->page);
+ kfree(ref);
+ spd->partial[i].private = 0;
+}
+
+static ssize_t
+tracing_buffers_splice_read(struct file *file, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct ftrace_buffer_info *info = file->private_data;
+ struct partial_page partial[PIPE_BUFFERS];
+ struct page *pages[PIPE_BUFFERS];
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .partial = partial,
+ .flags = flags,
+ .ops = &buffer_pipe_buf_ops,
+ .spd_release = buffer_spd_release,
+ };
+ struct buffer_ref *ref;
+ int size, i;
+ size_t ret;
+
+ /*
+ * We can't seek on a buffer input
+ */
+ if (unlikely(*ppos))
+ return -ESPIPE;
+
+
+ for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
+ struct page *page;
+ int r;
+
+ ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+ if (!ref)
+ break;
+
+ ref->buffer = info->tr->buffer;
+ ref->page = ring_buffer_alloc_read_page(ref->buffer);
+ if (!ref->page) {
+ kfree(ref);
+ break;
+ }
+
+ r = ring_buffer_read_page(ref->buffer, &ref->page,
+ len, info->cpu, 0);
+ if (r < 0) {
+ ring_buffer_free_read_page(ref->buffer,
+ ref->page);
+ kfree(ref);
+ break;
+ }
+
+ /*
+ * zero out any left over data, this is going to
+ * user land.
+ */
+ size = ring_buffer_page_len(ref->page);
+ if (size < PAGE_SIZE)
+ memset(ref->page + size, 0, PAGE_SIZE - size);
+
+ page = virt_to_page(ref->page);
+
+ spd.pages[i] = page;
+ spd.partial[i].len = PAGE_SIZE;
+ spd.partial[i].offset = 0;
+ spd.partial[i].private = (unsigned long)ref;
+ spd.nr_pages++;
+ }
+
+ spd.nr_pages = i;
+
+ /* did we read anything? */
+ if (!spd.nr_pages) {
+ if (flags & SPLICE_F_NONBLOCK)
+ ret = -EAGAIN;
+ else
+ ret = 0;
+ /* TODO: block */
+ return ret;
+ }
+
+ ret = splice_to_pipe(pipe, &spd);
+
+ return ret;
+}
+
+static const struct file_operations tracing_buffers_fops = {
+ .open = tracing_buffers_open,
+ .read = tracing_buffers_read,
+ .release = tracing_buffers_release,
+ .splice_read = tracing_buffers_splice_read,
+ .llseek = no_llseek,
+};
+
#ifdef CONFIG_DYNAMIC_FTRACE
int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3399,6 +3638,7 @@ static __init void create_trace_options_dir(void)
static __init int tracer_init_debugfs(void)
{
struct dentry *d_tracer;
+ struct dentry *buffers;
struct dentry *entry;
int cpu;
@@ -3471,6 +3711,26 @@ static __init int tracer_init_debugfs(void)
pr_warning("Could not create debugfs "
"'trace_marker' entry\n");
+ buffers = debugfs_create_dir("binary_buffers", d_tracer);
+
+ if (!buffers)
+ pr_warning("Could not create buffers directory\n");
+ else {
+ int cpu;
+ char buf[64];
+
+ for_each_tracing_cpu(cpu) {
+ sprintf(buf, "%d", cpu);
+
+ entry = debugfs_create_file(buf, 0444, buffers,
+ (void *)(long)cpu,
+ &tracing_buffers_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs buffers "
+ "'%s' entry\n", buf);
+ }
+ }
+
#ifdef CONFIG_DYNAMIC_FTRACE
entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e606633..561bb5c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -217,6 +217,7 @@ enum trace_flag_type {
*/
struct trace_array_cpu {
atomic_t disabled;
+ void *buffer_page; /* ring buffer spare */
/* these fields get copied into max-trace: */
unsigned long trace_idx;
--
1.5.6.5
--
next prev parent reply other threads:[~2009-03-04 2:54 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-03-04 2:49 [PATCH 0/5] [RFC] binary reading of ftrace ring buffers Steven Rostedt
2009-03-04 2:49 ` [PATCH 1/5] ring-buffer: reset write field for ring_buffer_read_page Steven Rostedt
2009-03-04 2:49 ` [PATCH 2/5] ring-buffer: fix ring_buffer_read_page Steven Rostedt
2009-03-04 2:49 ` [PATCH 3/5] ring-buffer: replace sizeof of event header with offsetof Steven Rostedt
2009-03-04 2:49 ` [PATCH 4/5] ring-buffer: make ring_buffer_read_page read from start on partial page Steven Rostedt
2009-03-04 2:49 ` Steven Rostedt [this message]
2009-03-04 3:35 ` [PATCH 5/5] tracing: add binary buffer files for use with splice Andrew Morton
2009-03-04 3:43 ` Steven Rostedt
2009-03-04 4:38 ` H. Peter Anvin
2009-03-04 4:45 ` Steven Rostedt
2009-03-04 4:46 ` Theodore Tso
2009-03-04 4:49 ` Steven Rostedt
2009-03-04 5:07 ` [PATCH] fs: make simple_read_from_buffer conventional Steven Rostedt
2009-03-04 10:12 ` Ingo Molnar
2009-03-04 3:01 ` [PATCH 0/5] [RFC] binary reading of ftrace ring buffers Steven Rostedt
2009-03-04 3:23 ` Steven Rostedt
2009-03-04 10:26 ` Ingo Molnar
2009-03-04 14:51 ` Steven Rostedt
2009-03-04 22:47 ` Ingo Oeser
2009-03-04 15:39 ` Mathieu Desnoyers
2009-03-04 17:00 ` Mathieu Desnoyers
2009-03-04 17:19 ` Peter Zijlstra
2009-03-06 16:59 ` Steven Rostedt
2009-03-06 19:10 ` Mathieu Desnoyers
2009-03-06 23:28 ` Jiaying Zhang
2009-03-08 19:21 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090304025251.015368537@goodmis.org \
--to=rostedt@goodmis.org \
--cc=acme@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=arjan@infradead.org \
--cc=compudj@krystal.dyndns.org \
--cc=eduard.munteanu@linux360.ro \
--cc=fche@redhat.com \
--cc=fweisbec@gmail.com \
--cc=hch@infradead.org \
--cc=hpa@zytor.com \
--cc=jbaron@redhat.com \
--cc=jiayingz@google.com \
--cc=kosaki.motohiro@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mbligh@google.com \
--cc=md@google.com \
--cc=mhiramat@redhat.com \
--cc=mingo@elte.hu \
--cc=mrubin@google.com \
--cc=peterz@infradead.org \
--cc=pq@iki.fi \
--cc=srostedt@redhat.com \
--cc=tytso@mit.edu \
--cc=tzanussi@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox