* [Patch 0/1] Enhancements to 'trace' infrastructure - v2
@ 2008-05-16 17:01 K.Prasad
2008-05-16 17:04 ` [RFC Patch 1/1] trace_printk and trace_dump interface " K.Prasad
0 siblings, 1 reply; 11+ messages in thread
From: K.Prasad @ 2008-05-16 17:01 UTC (permalink / raw)
To: kernel list; +Cc: dwilder, akpm, prasad@linux.vnet.ibm.com
Hi All,
Please review the following patch which introduces two new
interfaces to output data using 'trace' infrastructure. These interfaces
can be used to print directly onto a debugfs mount. Since it uses
'trace'
interface underneath the directory and file structures are as created by
'trace'.
The proposed trace_* interfaces are meant to overcome the need to
setup/tear-down 'trace' infrastructure by the user. In addition to this
the patches help in:
a) Printing out data into a debugfs mounted file without bothering about
creating/re-using debugfs files, sizes of intermediate buffers, etc.
(This is achieved through the functions init_trace_interface() and
trace_exists()).
b) Provide interfaces to do i)string output or ii)binary dump of data
c) Tear-down of the trace infrastructure through a single function call
using its parent directory name, and unmindful of the internal data
structures such as 'struct trace_info'.(using trace_cleanup_all)
d) The trace_printk_data may be further enhanced to provide features
suh as the ability to invoke pre- and post- printing callback functions
For e.g. A callback function to obtain a given lock before printing out
a given data, etc.
A quick look at samples/trace/fork_trace.c would help understand what is
required to get data out using the 'trace' infrastructure (vs the
proposed interfaces which have been exemplified in
samples/trace/fork_new_trace.c).
The patches are based against 2.6.25-mm1 and have been tested on an
i386 machine.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 11+ messages in thread
* [RFC Patch 1/1] trace_printk and trace_dump interface - v2
2008-05-16 17:01 [Patch 0/1] Enhancements to 'trace' infrastructure - v2 K.Prasad
@ 2008-05-16 17:04 ` K.Prasad
2008-05-17 2:22 ` K.Prasad
2008-05-19 20:02 ` [RFC Patch 1/1] trace_printk and trace_dump interface - v2 David Wilder
0 siblings, 2 replies; 11+ messages in thread
From: K.Prasad @ 2008-05-16 17:04 UTC (permalink / raw)
To: kernel list; +Cc: dwilder, akpm, prasad@linux.vnet.ibm.com
This patch introduces two new interfaces called trace_printk and
trace_dump which can be used to print to the debugfs mount directly.
It uses the 'trace' infrastructure underneath and is a patch over it.
A sample file is also created to demonstrate its ease of use.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
Documentation/trace.txt | 22 ++++
include/linux/trace.h | 57 +++++++++++
lib/trace.c | 204
+++++++++++++++++++++++++++++++++++++++--
samples/trace/Makefile | 2
samples/trace/fork_new_trace.c | 99 +++++++++++++++++++
5 files changed, 378 insertions(+), 6 deletions(-)
Index: linux-blktrace-many/include/linux/trace.h
===================================================================
--- linux-blktrace-many.orig/include/linux/trace.h
+++ linux-blktrace-many/include/linux/trace.h
@@ -39,10 +39,22 @@ enum trace_state {
TRACE_STOPPED,
};
+enum trace_dir_state {
+ TRACE_PARENT_DIR_ABSENT,
+ TRACE_PARENT_DIR_EXISTS,
+ TRACE_DIR_EXISTS
+};
+
#define TRACE_ROOT_NAME_SIZE 64 /* Max root dir identifier */
#define TRACE_NAME_SIZE 64 /* Max trace identifier */
/*
+ * Buffers for use by trace_printk
+ */
+#define DEFAULT_TRACE_BUF_SIZE 4096
+#define DEFAULT_TRACE_SUB_BUF_NR 40
+
+/*
* Global root user information
*/
struct trace_root {
@@ -71,6 +83,28 @@ struct trace_info {
unsigned int flags;
unsigned int buf_size;
unsigned int buf_nr;
+ spinlock_t trace_lock;
+};
+
+struct trace_printk_data {
+ char *parent_dir;
+ char *dir;
+ int exists;
+ int buf_size;
+ int sub_buf_size;
+ unsigned long flags;
+ struct trace_info *ti;
+};
+
+/*
+ * Information about every trace directory
+ */
+struct trace_dir {
+ struct list_head trace_dir_list;
+ char trace_dir_name[TRACE_NAME_SIZE];
+ struct dentry *trace_root;
+ struct dentry *trace_dir;
+ struct trace_info *ti;
};
#ifdef CONFIG_TRACE
@@ -83,6 +117,12 @@ struct trace_info *trace_setup(const cha
int trace_start(struct trace_info *trace);
int trace_stop(struct trace_info *trace);
void trace_cleanup(struct trace_info *trace);
+int trace_exists(const char *parent_dir, const char *dir,
+ struct trace_info **ti);
+void trace_cleanup_all(const char *parent_dir);
+int trace_printk(struct trace_printk_data *dpk, char *format, ...);
+int trace_dump(struct trace_printk_data *dpk, const void *output,
+ const int output_len);
#else
static inline struct trace_info *trace_setup(const char *root,
const char *name, u32 buf_size,
@@ -94,6 +134,23 @@ static inline int trace_start(struct tra
static inline int trace_stop(struct trace_info *trace) { return -EINVAL; }
static inline int trace_running(struct trace_info *trace) { return 0; }
static inline void trace_cleanup(struct trace_info *trace) {}
+static inline int trace_exists(const char *parent_dir, const char *dir,
+ struct trace_info **ti)
+{
+ return -EINVAL;
+}
+static inline void trace_cleanup_all(const char *parent_dir) {}
+static inline int trace_printk(struct trace_printk_data *dpk, char
*format,
+ ...)
+{
+ return -EINVAL;
+}
+int trace_dump(struct trace_printk_data *dpk, const void *output,
+ const int output_len)
+{
+ return -EINVAL;
+}
+
#endif
#endif
Index: linux-blktrace-many/lib/trace.c
===================================================================
--- linux-blktrace-many.orig/lib/trace.c
+++ linux-blktrace-many/lib/trace.c
@@ -29,6 +29,7 @@
#include <linux/trace.h>
static LIST_HEAD(trace_roots);
+static LIST_HEAD(trace_dirs);
static DEFINE_MUTEX(trace_mutex);
static int state_open(struct inode *inode, struct file *filp)
@@ -99,9 +100,19 @@ static void remove_root(struct trace_inf
static void remove_tree(struct trace_info *trace)
{
+ struct list_head *pos, *temp;
+ struct trace_dir *dr = NULL;
+
mutex_lock(&trace_mutex);
debugfs_remove(trace->dir);
+ list_for_each_safe(pos, temp, &trace_dirs) {
+ dr = list_entry(pos, struct trace_dir, trace_dir_list);
+ if (dr->ti == trace) {
+ list_del(pos);
+ kfree(dr);
+ }
+ }
if (trace->root) {
if (--trace->root->users == 0)
remove_root(trace);
@@ -142,11 +153,17 @@ static struct trace_root *lookup_root(co
static struct dentry *create_tree(struct trace_info *trace, const char
*root,
const char *name)
{
- struct dentry *dir = NULL;
+ struct trace_dir *temp;
if (root == NULL || name == NULL)
return ERR_PTR(-EINVAL);
+ temp = kzalloc(sizeof(struct trace_dir), GFP_KERNEL);
+ if ((temp == NULL) || (strlen(name) > TRACE_NAME_SIZE))
+ return ERR_PTR(-ENOMEM);
+
+ strlcpy(temp->trace_dir_name, name, sizeof(temp->trace_dir_name));
+
mutex_lock(&trace_mutex);
trace->root = lookup_root(root);
@@ -155,17 +172,49 @@ static struct dentry *create_tree(struct
goto err;
}
- dir = debugfs_create_dir(name, trace->root->root);
- if (IS_ERR(dir))
+ temp->trace_root = trace->root->root;
+ temp->trace_dir = debugfs_create_dir(name, trace->root->root);
+
+ if (IS_ERR(temp->trace_dir))
remove_root(trace);
- else
+ else {
trace->root->users++;
+ temp->ti = trace;
+ list_add_tail(&temp->trace_dir_list, &trace_dirs);
+ }
err:
mutex_unlock(&trace_mutex);
- return dir;
+ return temp->trace_dir;
}
+int trace_exists(const char *parent_dir, const char *dir,
+ struct trace_info **ti)
+{
+ struct list_head *pos;
+ struct trace_root *r;
+ struct trace_dir *temp;
+
+ list_for_each(pos, &trace_roots) {
+ r = list_entry(pos, struct trace_root, list);
+ if (!strcmp(parent_dir, r->name))
+ goto search_dir;
+ }
+ return TRACE_PARENT_DIR_ABSENT;
+
+ search_dir:
+ list_for_each(pos, &trace_dirs) {
+ temp = list_entry(pos, struct trace_dir, trace_dir_list);
+
+ if (!strcmp(dir, temp->trace_dir_name)) {
+ *ti = temp->ti;
+ return TRACE_DIR_EXISTS;
+ }
+ }
+ return TRACE_PARENT_DIR_EXISTS;
+}
+EXPORT_SYMBOL_GPL(trace_exists);
+
static int dropped_open(struct inode *inode, struct file *filp)
{
filp->private_data = inode->i_private;
@@ -561,3 +610,148 @@ void trace_cleanup(struct trace_info *tr
kfree(trace);
}
EXPORT_SYMBOL_GPL(trace_cleanup);
+
+/**
+ * trace_cleanup_all - Removes all trace directories under a parent_dir
+ * @parent_dir: Name of the parent directory
+ */
+void trace_cleanup_all(const char *parent_dir)
+{
+ struct list_head *pos, *pos_temp;
+ struct trace_dir *temp;
+
+ list_for_each_safe(pos, pos_temp, &trace_dirs) {
+ temp = list_entry(pos, struct trace_dir, trace_dir_list);
+ if (!strncmp(parent_dir, temp->trace_root->d_iname, strlen(parent_dir)))
+ trace_cleanup(temp->ti);
+ }
+}
+EXPORT_SYMBOL_GPL(trace_cleanup_all);
+
+/*
+ * Send formatted trace data to trace channel.
+ */
+static int trace_printf(struct trace_info *trace, const char *format,
+ va_list ap)
+{
+ va_list aq;
+ char *record;
+ int len, ret = 0;
+
+ if (trace_running(trace)) {
+ va_copy(aq, ap);
+ len = vsnprintf(NULL, 0, format, aq);
+ va_end(aq);
+ record = relay_reserve(trace->rchan, ++len);
+ if (record)
+ ret = vsnprintf(record, len, format, ap);
+ }
+ return ret;
+}
+
+static inline int init_trace_interface(struct trace_printk_data *tpk)
+{
+ int ret = 0;
+ tpk->exists = trace_exists(tpk->parent_dir, tpk->dir, &tpk->ti);
+
+ switch(tpk->exists) {
+
+ case TRACE_PARENT_DIR_EXISTS:
+ case TRACE_PARENT_DIR_ABSENT:
+ if(!tpk->buf_size)
+ tpk->buf_size = DEFAULT_TRACE_BUF_SIZE;
+ if(!tpk->sub_buf_size)
+ tpk->sub_buf_size = DEFAULT_TRACE_SUB_BUF_NR;
+ tpk->ti = trace_setup(tpk->parent_dir, tpk->dir,
+ tpk->buf_size, tpk->sub_buf_size, tpk->flags);
+ printk(KERN_INFO "Trace interface %s setup\n",
+ tpk->ti->dir->d_iname);
+ if (IS_ERR(tpk->ti)) {
+ printk(KERN_ERR "Error initialising %s interface\n",
+ tpk->ti->dir->d_iname);
+ return -EPERM;
+ }
+ /* Fall through */
+ case TRACE_DIR_EXISTS:
+ if (tpk->ti->state == TRACE_SETUP)
+ ret = trace_start(tpk->ti);
+ else
+ ret = -EPERM;
+ }
+
+ return 0;
+}
+
+/**
+ * trace_printk - Output a string to debugfs mount 'directly' using
'trace'
+ * @tpk: Structure containing info such as parent_dir and directory
+ * @format: String containing format string specifiers
+ * @ap: List of arguments
+ */
+int trace_printk(struct trace_printk_data *tpk, char *format, ...)
+{
+ int ret = 0;
+ va_list(ap);
+ unsigned long flags = 0;
+
+ va_start(ap, format);
+
+ ret = init_trace_interface(tpk);
+ if (unlikely(ret))
+ return ret;
+
+ /* Now do the actual printing */
+ /* Take an RCU Lock over the trace_info state */
+ rcu_read_lock();
+ /* Take a spinlock for the global buffer used by relay */
+ if (tpk->flags & TRACE_GLOBAL_CHANNEL)
+ spin_lock_irqsave(&tpk->ti->trace_lock, flags);
+ ret = trace_printf(tpk->ti, format, ap);
+ if (tpk->flags & TRACE_GLOBAL_CHANNEL)
+ spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
+ rcu_read_unlock();
+
+ va_end(ap);
+ return ret;
+}
+EXPORT_SYMBOL(trace_printk);
+
+/**
+ * trace_dump - Output binary into debugfs mount 'directly' using 'trace'
+ * @tpk: Structure containing info such as parent_dir and directory
+ * @output: Data that needs to be output
+ * @output_len: Length of the output data
+ */
+int trace_dump(struct trace_printk_data *tpk, const void *output,
+ const int output_len)
+{
+ char *record;
+ unsigned long flags = 0;
+ int ret = 0;
+
+ ret = init_trace_interface(tpk);
+ if (unlikely(ret))
+ return ret;
+
+ /* Now do the actual printing */
+ rcu_read_lock();
+ /* Take a spinlock for the global buffer used by relay */
+ if (tpk->flags & TRACE_GLOBAL_CHANNEL)
+ spin_lock_irqsave(&tpk->ti->trace_lock, flags);
+ record = relay_reserve(tpk->ti->rchan, output_len);
+
+ if (record && trace_running(tpk->ti))
+ memcpy(record, output, output_len);
+ else {
+ if(record)
+ ret = -EPERM;
+ else
+ ret = -ENOMEM;
+ }
+ if (tpk->flags & TRACE_GLOBAL_CHANNEL)
+ spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(trace_dump);
Index: linux-blktrace-many/samples/trace/fork_new_trace.c
===================================================================
--- /dev/null
+++ linux-blktrace-many/samples/trace/fork_new_trace.c
@@ -0,0 +1,99 @@
+/*
+ * An example of using trace in a kprobes module
+ *
+ * Copyright (C) 2008 IBM Inc.
+ *
+ * K.Prasad <prasad@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
USA
+ *
+ * -------
+ * This module creates a trace channel and places a kprobe
+ * on the function do_fork(). The value of current->pid is written to
+ * the trace channel each time the kprobe is hit..
+ *
+ * How to run the example:
+ * $ mount -t debugfs /debug
+ * $ insmod fork_new_trace.ko
+ *
+ * To view the data produced by the module:
+ * $ cat /debug/trace_example/do_fork/trace0
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/trace.h>
+
+#define SAMPLE_PARENT_DIR "trace_new_example"
+#define PROBE_POINT "do_fork"
+
+static struct kprobe kp;
+static struct trace_printk_data *tpk;
+
+static int handler_pre(struct kprobe *p, struct pt_regs *regs)
+{
+ trace_printk(tpk, "%d\n", current->pid);
+ return 0;
+}
+
+int init_module(void)
+{
+ int ret = 0;
+ int len_parent_dir, len_dir;
+
+ /* setup the kprobe */
+ kp.pre_handler = handler_pre;
+ kp.post_handler = NULL;
+ kp.fault_handler = NULL;
+ kp.symbol_name = PROBE_POINT;
+ ret = register_kprobe(&kp);
+ if (ret) {
+ printk(KERN_ERR "fork_trace: register_kprobe failed\n");
+ return ret;
+ }
+
+ len_parent_dir = strlen(SAMPLE_PARENT_DIR) + 1;
+ /* Initialising len_dir to the larger of the two dir names */
+ len_dir = strlen("kprobe_struct") + 1;
+
+ tpk = kzalloc(sizeof(*tpk), GFP_KERNEL);
+ if(!tpk)
+ ret = 1;
+
+ tpk->parent_dir = SAMPLE_PARENT_DIR;
+
+ /* Let's do a binary dump of struct kprobe using trace_dump */
+ tpk->dir = "kprobes_struct";
+ tpk->flags = TRACE_GLOBAL_CHANNEL;
+ trace_dump(tpk, &kp, sizeof(kp));
+
+ /* Now change the directory to collect fork pid data */
+ tpk->dir = PROBE_POINT;
+
+ if(ret)
+ printk(KERN_ERR "Unable to find required free memory. "
+ "Trace new sample module loading aborted");
+ return ret;
+}
+
+void cleanup_module(void)
+{
+ unregister_kprobe(&kp);
+
+ /* Just a single cleanup call passing the parent dir string */
+ trace_cleanup_all(SAMPLE_PARENT_DIR);
+}
+MODULE_LICENSE("GPL");
Index: linux-blktrace-many/samples/trace/Makefile
===================================================================
--- linux-blktrace-many.orig/samples/trace/Makefile
+++ linux-blktrace-many/samples/trace/Makefile
@@ -1,4 +1,4 @@
# builds the trace example kernel modules;
# then to use (as root): insmod <fork_trace.ko>
-obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o
+obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o fork_new_trace.o
Index: linux-blktrace-many/Documentation/trace.txt
===================================================================
--- linux-blktrace-many.orig/Documentation/trace.txt
+++ linux-blktrace-many/Documentation/trace.txt
@@ -150,6 +150,28 @@ The steps a kernel data provider takes t
5) Destroy the trace channel and underlying relay channel -
trace_cleanup().
+Alternatively the user may choose to make use of two new interfaces --
+trace_printk() and trace_dump() -- to setup trace interface and
+trace_cleanup_all() to tear-down the same.
+
+Steps to use:
+1) Create and populate an instance of trace_printk_data structure. The
fields
+ parent_dir and dir are mandatory. The fields buf_size, sub_buf_size
and flags
+ are optional and will take default values if not populated. The field
+ 'exists' and ti are for the trace infrastructure to use. The pointer
to the
+ 'struct trace_info' i.e. ti may be used to perform fine granular
operations
+ such as determine the state of the 'trace', stop individual traces,
etc.
+2) Default values for buf_size and sub_buf_size are 4096, 40 respectively.
+3) Use trace_dump() to output binary data which may be acted upon by a
+ high-level program (say dumping a structure). trace_printk() can be
used
+ for string output. Pass a pointer to the instance of trace_printk_data
+ structure to these functions along with other parameters. The output
from
+ these functions can be found at
+ <debugfs_mount>/<parent_dir>/<dir>/trace<0..n>.
+4) trace_cleanup_all() for a given parent directory will cleanup and
remove all
+ trace directories created under the specified directory.
+5) Sample code for the same can be found in samples/trace/fork_new_trace.c
+
Kernel Configuration
--------------------
To use trace, configure your kernel with CONFIG_TRACE=y. Trace depends on
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC Patch 1/1] trace_printk and trace_dump interface - v2
2008-05-16 17:04 ` [RFC Patch 1/1] trace_printk and trace_dump interface " K.Prasad
@ 2008-05-17 2:22 ` K.Prasad
2008-05-19 23:21 ` Andrew Morton
2008-05-19 20:02 ` [RFC Patch 1/1] trace_printk and trace_dump interface - v2 David Wilder
1 sibling, 1 reply; 11+ messages in thread
From: K.Prasad @ 2008-05-17 2:22 UTC (permalink / raw)
To: kernel list; +Cc: dwilder, akpm, prasad@linux.vnet.ibm.com
Resending this patch due to style issues found in previous patch.
This patch introduces two new interfaces called trace_printk and
trace_dump which can be used to print to the debugfs mount directly.
It uses the 'trace' infrastructure underneath and is a patch over it.
A sample file is also created to demonstrate its ease of use.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
Documentation/trace.txt | 22 ++++
include/linux/trace.h | 57 +++++++++++
lib/trace.c | 205
++++++++++++++++++++++++++++++++++++++++-
samples/trace/Makefile | 2
samples/trace/fork_new_trace.c | 99 +++++++++++++++++++
5 files changed, 379 insertions(+), 6 deletions(-)
Index: linux-blktrace-many/include/linux/trace.h
===================================================================
--- linux-blktrace-many.orig/include/linux/trace.h
+++ linux-blktrace-many/include/linux/trace.h
@@ -39,10 +39,22 @@ enum trace_state {
TRACE_STOPPED,
};
+enum trace_dir_state {
+ TRACE_PARENT_DIR_ABSENT,
+ TRACE_PARENT_DIR_EXISTS,
+ TRACE_DIR_EXISTS
+};
+
#define TRACE_ROOT_NAME_SIZE 64 /* Max root dir identifier */
#define TRACE_NAME_SIZE 64 /* Max trace identifier */
/*
+ * Buffers for use by trace_printk
+ */
+#define DEFAULT_TRACE_BUF_SIZE 4096
+#define DEFAULT_TRACE_SUB_BUF_NR 40
+
+/*
* Global root user information
*/
struct trace_root {
@@ -71,6 +83,28 @@ struct trace_info {
unsigned int flags;
unsigned int buf_size;
unsigned int buf_nr;
+ spinlock_t trace_lock;
+};
+
+struct trace_printk_data {
+ char *parent_dir;
+ char *dir;
+ int exists;
+ int buf_size;
+ int sub_buf_size;
+ unsigned long flags;
+ struct trace_info *ti;
+};
+
+/*
+ * Information about every trace directory
+ */
+struct trace_dir {
+ struct list_head trace_dir_list;
+ char trace_dir_name[TRACE_NAME_SIZE];
+ struct dentry *trace_root;
+ struct dentry *trace_dir;
+ struct trace_info *ti;
};
#ifdef CONFIG_TRACE
@@ -83,6 +117,12 @@ struct trace_info *trace_setup(const cha
int trace_start(struct trace_info *trace);
int trace_stop(struct trace_info *trace);
void trace_cleanup(struct trace_info *trace);
+int trace_exists(const char *parent_dir, const char *dir,
+ struct trace_info **ti);
+void trace_cleanup_all(const char *parent_dir);
+int trace_printk(struct trace_printk_data *dpk, char *format, ...);
+int trace_dump(struct trace_printk_data *dpk, const void *output,
+ const int output_len);
#else
static inline struct trace_info *trace_setup(const char *root,
const char *name, u32 buf_size,
@@ -94,6 +134,23 @@ static inline int trace_start(struct tra
static inline int trace_stop(struct trace_info *trace) { return -EINVAL; }
static inline int trace_running(struct trace_info *trace) { return 0; }
static inline void trace_cleanup(struct trace_info *trace) {}
+static inline int trace_exists(const char *parent_dir, const char *dir,
+ struct trace_info **ti)
+{
+ return -EINVAL;
+}
+static inline void trace_cleanup_all(const char *parent_dir) {}
+static inline int trace_printk(struct trace_printk_data *dpk, char
*format,
+ ...)
+{
+ return -EINVAL;
+}
+int trace_dump(struct trace_printk_data *dpk, const void *output,
+ const int output_len)
+{
+ return -EINVAL;
+}
+
#endif
#endif
Index: linux-blktrace-many/lib/trace.c
===================================================================
--- linux-blktrace-many.orig/lib/trace.c
+++ linux-blktrace-many/lib/trace.c
@@ -29,6 +29,7 @@
#include <linux/trace.h>
static LIST_HEAD(trace_roots);
+static LIST_HEAD(trace_dirs);
static DEFINE_MUTEX(trace_mutex);
static int state_open(struct inode *inode, struct file *filp)
@@ -99,9 +100,19 @@ static void remove_root(struct trace_inf
static void remove_tree(struct trace_info *trace)
{
+ struct list_head *pos, *temp;
+ struct trace_dir *dr = NULL;
+
mutex_lock(&trace_mutex);
debugfs_remove(trace->dir);
+ list_for_each_safe(pos, temp, &trace_dirs) {
+ dr = list_entry(pos, struct trace_dir, trace_dir_list);
+ if (dr->ti == trace) {
+ list_del(pos);
+ kfree(dr);
+ }
+ }
if (trace->root) {
if (--trace->root->users == 0)
remove_root(trace);
@@ -142,11 +153,17 @@ static struct trace_root *lookup_root(co
static struct dentry *create_tree(struct trace_info *trace, const char
*root,
const char *name)
{
- struct dentry *dir = NULL;
+ struct trace_dir *temp;
if (root == NULL || name == NULL)
return ERR_PTR(-EINVAL);
+ temp = kzalloc(sizeof(struct trace_dir), GFP_KERNEL);
+ if ((temp == NULL) || (strlen(name) > TRACE_NAME_SIZE))
+ return ERR_PTR(-ENOMEM);
+
+ strlcpy(temp->trace_dir_name, name, sizeof(temp->trace_dir_name));
+
mutex_lock(&trace_mutex);
trace->root = lookup_root(root);
@@ -155,17 +172,49 @@ static struct dentry *create_tree(struct
goto err;
}
- dir = debugfs_create_dir(name, trace->root->root);
- if (IS_ERR(dir))
+ temp->trace_root = trace->root->root;
+ temp->trace_dir = debugfs_create_dir(name, trace->root->root);
+
+ if (IS_ERR(temp->trace_dir))
remove_root(trace);
- else
+ else {
trace->root->users++;
+ temp->ti = trace;
+ list_add_tail(&temp->trace_dir_list, &trace_dirs);
+ }
err:
mutex_unlock(&trace_mutex);
- return dir;
+ return temp->trace_dir;
}
+int trace_exists(const char *parent_dir, const char *dir,
+ struct trace_info **ti)
+{
+ struct list_head *pos;
+ struct trace_root *r;
+ struct trace_dir *temp;
+
+ list_for_each(pos, &trace_roots) {
+ r = list_entry(pos, struct trace_root, list);
+ if (!strcmp(parent_dir, r->name))
+ goto search_dir;
+ }
+ return TRACE_PARENT_DIR_ABSENT;
+
+ search_dir:
+ list_for_each(pos, &trace_dirs) {
+ temp = list_entry(pos, struct trace_dir, trace_dir_list);
+
+ if (!strcmp(dir, temp->trace_dir_name)) {
+ *ti = temp->ti;
+ return TRACE_DIR_EXISTS;
+ }
+ }
+ return TRACE_PARENT_DIR_EXISTS;
+}
+EXPORT_SYMBOL_GPL(trace_exists);
+
static int dropped_open(struct inode *inode, struct file *filp)
{
filp->private_data = inode->i_private;
@@ -561,3 +610,149 @@ void trace_cleanup(struct trace_info *tr
kfree(trace);
}
EXPORT_SYMBOL_GPL(trace_cleanup);
+
+/**
+ * trace_cleanup_all - Removes all trace directories under a parent_dir
+ * @parent_dir: Name of the parent directory
+ */
+void trace_cleanup_all(const char *parent_dir)
+{
+ struct list_head *pos, *pos_temp;
+ struct trace_dir *temp;
+
+ list_for_each_safe(pos, pos_temp, &trace_dirs) {
+ temp = list_entry(pos, struct trace_dir, trace_dir_list);
+ if (!strncmp(parent_dir, temp->trace_root->d_iname, \
+ strlen(parent_dir)))
+ trace_cleanup(temp->ti);
+ }
+}
+EXPORT_SYMBOL_GPL(trace_cleanup_all);
+
+/*
+ * Send formatted trace data to trace channel.
+ */
+static int trace_printf(struct trace_info *trace, const char *format,
+ va_list ap)
+{
+ va_list aq;
+ char *record;
+ int len, ret = 0;
+
+ if (trace_running(trace)) {
+ va_copy(aq, ap);
+ len = vsnprintf(NULL, 0, format, aq);
+ va_end(aq);
+ record = relay_reserve(trace->rchan, ++len);
+ if (record)
+ ret = vsnprintf(record, len, format, ap);
+ }
+ return ret;
+}
+
+static inline int init_trace_interface(struct trace_printk_data *tpk)
+{
+ int ret = 0;
+ tpk->exists = trace_exists(tpk->parent_dir, tpk->dir, &tpk->ti);
+
+ switch (tpk->exists) {
+
+ case TRACE_PARENT_DIR_EXISTS:
+ case TRACE_PARENT_DIR_ABSENT:
+ if (!tpk->buf_size)
+ tpk->buf_size = DEFAULT_TRACE_BUF_SIZE;
+ if (!tpk->sub_buf_size)
+ tpk->sub_buf_size = DEFAULT_TRACE_SUB_BUF_NR;
+ tpk->ti = trace_setup(tpk->parent_dir, tpk->dir,
+ tpk->buf_size, tpk->sub_buf_size, tpk->flags);
+ printk(KERN_INFO "Trace interface %s setup\n",
+ tpk->ti->dir->d_iname);
+ if (IS_ERR(tpk->ti)) {
+ printk(KERN_ERR "Error initialising %s interface\n",
+ tpk->ti->dir->d_iname);
+ return -EPERM;
+ }
+ /* Fall through */
+ case TRACE_DIR_EXISTS:
+ if (tpk->ti->state == TRACE_SETUP)
+ ret = trace_start(tpk->ti);
+ else
+ ret = -EPERM;
+ }
+
+ return 0;
+}
+
+/**
+ * trace_printk - Output a string to debugfs mount 'directly' using
'trace'
+ * @tpk: Structure containing info such as parent_dir and directory
+ * @format: String containing format string specifiers
+ * @ap: List of arguments
+ */
+int trace_printk(struct trace_printk_data *tpk, char *format, ...)
+{
+ int ret = 0;
+ va_list(ap);
+ unsigned long flags = 0;
+
+ va_start(ap, format);
+
+ ret = init_trace_interface(tpk);
+ if (unlikely(ret))
+ return ret;
+
+ /* Now do the actual printing */
+ /* Take an RCU Lock over the trace_info state */
+ rcu_read_lock();
+ /* Take a spinlock for the global buffer used by relay */
+ if (tpk->flags & TRACE_GLOBAL_CHANNEL)
+ spin_lock_irqsave(&tpk->ti->trace_lock, flags);
+ ret = trace_printf(tpk->ti, format, ap);
+ if (tpk->flags & TRACE_GLOBAL_CHANNEL)
+ spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
+ rcu_read_unlock();
+
+ va_end(ap);
+ return ret;
+}
+EXPORT_SYMBOL(trace_printk);
+
+/**
+ * trace_dump - Output binary into debugfs mount 'directly' using 'trace'
+ * @tpk: Structure containing info such as parent_dir and directory
+ * @output: Data that needs to be output
+ * @output_len: Length of the output data
+ */
+int trace_dump(struct trace_printk_data *tpk, const void *output,
+ const int output_len)
+{
+ char *record;
+ unsigned long flags = 0;
+ int ret = 0;
+
+ ret = init_trace_interface(tpk);
+ if (unlikely(ret))
+ return ret;
+
+ /* Now do the actual printing */
+ rcu_read_lock();
+ /* Take a spinlock for the global buffer used by relay */
+ if (tpk->flags & TRACE_GLOBAL_CHANNEL)
+ spin_lock_irqsave(&tpk->ti->trace_lock, flags);
+ record = relay_reserve(tpk->ti->rchan, output_len);
+
+ if (record && trace_running(tpk->ti))
+ memcpy(record, output, output_len);
+ else {
+ if (record)
+ ret = -EPERM;
+ else
+ ret = -ENOMEM;
+ }
+ if (tpk->flags & TRACE_GLOBAL_CHANNEL)
+ spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(trace_dump);
Index: linux-blktrace-many/samples/trace/fork_new_trace.c
===================================================================
--- /dev/null
+++ linux-blktrace-many/samples/trace/fork_new_trace.c
@@ -0,0 +1,99 @@
+/*
+ * An example of using trace in a kprobes module
+ *
+ * Copyright (C) 2008 IBM Inc.
+ *
+ * K.Prasad <prasad@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
USA
+ *
+ * -------
+ * This module creates a trace channel and places a kprobe
+ * on the function do_fork(). The value of current->pid is written to
+ * the trace channel each time the kprobe is hit..
+ *
+ * How to run the example:
+ * $ mount -t debugfs /debug
+ * $ insmod fork_new_trace.ko
+ *
+ * To view the data produced by the module:
+ * $ cat /debug/trace_example/do_fork/trace0
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/trace.h>
+
+#define SAMPLE_PARENT_DIR "trace_new_example"
+#define PROBE_POINT "do_fork"
+
+static struct kprobe kp;
+static struct trace_printk_data *tpk;
+
+static int handler_pre(struct kprobe *p, struct pt_regs *regs)
+{
+ trace_printk(tpk, "%d\n", current->pid);
+ return 0;
+}
+
+int init_module(void)
+{
+ int ret = 0;
+ int len_parent_dir, len_dir;
+
+ /* setup the kprobe */
+ kp.pre_handler = handler_pre;
+ kp.post_handler = NULL;
+ kp.fault_handler = NULL;
+ kp.symbol_name = PROBE_POINT;
+ ret = register_kprobe(&kp);
+ if (ret) {
+ printk(KERN_ERR "fork_trace: register_kprobe failed\n");
+ return ret;
+ }
+
+ len_parent_dir = strlen(SAMPLE_PARENT_DIR) + 1;
+ /* Initialising len_dir to the larger of the two dir names */
+ len_dir = strlen("kprobe_struct") + 1;
+
+ tpk = kzalloc(sizeof(*tpk), GFP_KERNEL);
+ if (!tpk)
+ ret = 1;
+
+ tpk->parent_dir = SAMPLE_PARENT_DIR;
+
+ /* Let's do a binary dump of struct kprobe using trace_dump */
+ tpk->dir = "kprobes_struct";
+ tpk->flags = TRACE_GLOBAL_CHANNEL;
+ trace_dump(tpk, &kp, sizeof(kp));
+
+ /* Now change the directory to collect fork pid data */
+ tpk->dir = PROBE_POINT;
+
+ if (ret)
+ printk(KERN_ERR "Unable to find required free memory. "
+ "Trace new sample module loading aborted");
+ return ret;
+}
+
+void cleanup_module(void)
+{
+ unregister_kprobe(&kp);
+
+ /* Just a single cleanup call passing the parent dir string */
+ trace_cleanup_all(SAMPLE_PARENT_DIR);
+}
+MODULE_LICENSE("GPL");
Index: linux-blktrace-many/samples/trace/Makefile
===================================================================
--- linux-blktrace-many.orig/samples/trace/Makefile
+++ linux-blktrace-many/samples/trace/Makefile
@@ -1,4 +1,4 @@
# builds the trace example kernel modules;
# then to use (as root): insmod <fork_trace.ko>
-obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o
+obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o fork_new_trace.o
Index: linux-blktrace-many/Documentation/trace.txt
===================================================================
--- linux-blktrace-many.orig/Documentation/trace.txt
+++ linux-blktrace-many/Documentation/trace.txt
@@ -150,6 +150,28 @@ The steps a kernel data provider takes t
5) Destroy the trace channel and underlying relay channel -
trace_cleanup().
+Alternatively the user may choose to make use of two new interfaces --
+trace_printk() and trace_dump() -- to setup trace interface and
+trace_cleanup_all() to tear-down the same.
+
+Steps to use:
+1) Create and populate an instance of trace_printk_data structure. The
fields
+ parent_dir and dir are mandatory. The fields buf_size, sub_buf_size
and flags
+ are optional and will take default values if not populated. The field
+ 'exists' and ti are for the trace infrastructure to use. The pointer
to the
+ 'struct trace_info' i.e. ti may be used to perform fine granular
operations
+ such as determine the state of the 'trace', stop individual traces,
etc.
+2) Default values for buf_size and sub_buf_size are 4096, 40 respectively.
+3) Use trace_dump() to output binary data which may be acted upon by a
+ high-level program (say dumping a structure). trace_printk() can be
used
+ for string output. Pass a pointer to the instance of trace_printk_data
+ structure to these functions along with other parameters. The output
from
+ these functions can be found at
+ <debugfs_mount>/<parent_dir>/<dir>/trace<0..n>.
+4) trace_cleanup_all() for a given parent directory will cleanup and
remove all
+ trace directories created under the specified directory.
+5) Sample code for the same can be found in samples/trace/fork_new_trace.c
+
Kernel Configuration
--------------------
To use trace, configure your kernel with CONFIG_TRACE=y. Trace depends on
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC Patch 1/1] trace_printk and trace_dump interface - v2
2008-05-16 17:04 ` [RFC Patch 1/1] trace_printk and trace_dump interface " K.Prasad
2008-05-17 2:22 ` K.Prasad
@ 2008-05-19 20:02 ` David Wilder
1 sibling, 0 replies; 11+ messages in thread
From: David Wilder @ 2008-05-19 20:02 UTC (permalink / raw)
To: K.Prasad; +Cc: kernel list, akpm
ACK
trace_printk() and trace_dump() will be very usefully. This simplified
interface to trace is a needed improvement. Among other features it
makes it easy to convert from printk to trace and provides separate
streams for debugging data. For many subsystems a conversion from
printk to trace only requires changing a #define.
Dave.
K.Prasad wrote:
> This patch introduces two new interfaces called trace_printk and
> trace_dump which can be used to print to the debugfs mount directly.
> It uses the 'trace' infrastructure underneath and is a patch over it.
> A sample file is also created to demonstrate its ease of use.
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> ---
> Documentation/trace.txt | 22 ++++
> include/linux/trace.h | 57 +++++++++++
> lib/trace.c | 204
> +++++++++++++++++++++++++++++++++++++++--
> samples/trace/Makefile | 2
> samples/trace/fork_new_trace.c | 99 +++++++++++++++++++
> 5 files changed, 378 insertions(+), 6 deletions(-)
>
> Index: linux-blktrace-many/include/linux/trace.h
> ===================================================================
> --- linux-blktrace-many.orig/include/linux/trace.h
> +++ linux-blktrace-many/include/linux/trace.h
> @@ -39,10 +39,22 @@ enum trace_state {
> TRACE_STOPPED,
> };
>
> +enum trace_dir_state {
> + TRACE_PARENT_DIR_ABSENT,
> + TRACE_PARENT_DIR_EXISTS,
> + TRACE_DIR_EXISTS
> +};
> +
> #define TRACE_ROOT_NAME_SIZE 64 /* Max root dir identifier */
> #define TRACE_NAME_SIZE 64 /* Max trace identifier */
>
> /*
> + * Buffers for use by trace_printk
> + */
> +#define DEFAULT_TRACE_BUF_SIZE 4096
> +#define DEFAULT_TRACE_SUB_BUF_NR 40
> +
> +/*
> * Global root user information
> */
> struct trace_root {
> @@ -71,6 +83,28 @@ struct trace_info {
> unsigned int flags;
> unsigned int buf_size;
> unsigned int buf_nr;
> + spinlock_t trace_lock;
> +};
> +
> +struct trace_printk_data {
> + char *parent_dir;
> + char *dir;
> + int exists;
> + int buf_size;
> + int sub_buf_size;
> + unsigned long flags;
> + struct trace_info *ti;
> +};
> +
> +/*
> + * Information about every trace directory
> + */
> +struct trace_dir {
> + struct list_head trace_dir_list;
> + char trace_dir_name[TRACE_NAME_SIZE];
> + struct dentry *trace_root;
> + struct dentry *trace_dir;
> + struct trace_info *ti;
> };
>
> #ifdef CONFIG_TRACE
> @@ -83,6 +117,12 @@ struct trace_info *trace_setup(const cha
> int trace_start(struct trace_info *trace);
> int trace_stop(struct trace_info *trace);
> void trace_cleanup(struct trace_info *trace);
> +int trace_exists(const char *parent_dir, const char *dir,
> + struct trace_info **ti);
> +void trace_cleanup_all(const char *parent_dir);
> +int trace_printk(struct trace_printk_data *dpk, char *format, ...);
> +int trace_dump(struct trace_printk_data *dpk, const void *output,
> + const int output_len);
> #else
> static inline struct trace_info *trace_setup(const char *root,
> const char *name, u32 buf_size,
> @@ -94,6 +134,23 @@ static inline int trace_start(struct tra
> static inline int trace_stop(struct trace_info *trace) { return -EINVAL; }
> static inline int trace_running(struct trace_info *trace) { return 0; }
> static inline void trace_cleanup(struct trace_info *trace) {}
> +static inline int trace_exists(const char *parent_dir, const char *dir,
> + struct trace_info **ti)
> +{
> + return -EINVAL;
> +}
> +static inline void trace_cleanup_all(const char *parent_dir) {}
> +static inline int trace_printk(struct trace_printk_data *dpk, char
> *format,
> + ...)
> +{
> + return -EINVAL;
> +}
> +int trace_dump(struct trace_printk_data *dpk, const void *output,
> + const int output_len)
> +{
> + return -EINVAL;
> +}
> +
> #endif
>
> #endif
> Index: linux-blktrace-many/lib/trace.c
> ===================================================================
> --- linux-blktrace-many.orig/lib/trace.c
> +++ linux-blktrace-many/lib/trace.c
> @@ -29,6 +29,7 @@
> #include <linux/trace.h>
>
> static LIST_HEAD(trace_roots);
> +static LIST_HEAD(trace_dirs);
> static DEFINE_MUTEX(trace_mutex);
>
> static int state_open(struct inode *inode, struct file *filp)
> @@ -99,9 +100,19 @@ static void remove_root(struct trace_inf
>
> static void remove_tree(struct trace_info *trace)
> {
> + struct list_head *pos, *temp;
> + struct trace_dir *dr = NULL;
> +
> mutex_lock(&trace_mutex);
> debugfs_remove(trace->dir);
>
> + list_for_each_safe(pos, temp, &trace_dirs) {
> + dr = list_entry(pos, struct trace_dir, trace_dir_list);
> + if (dr->ti == trace) {
> + list_del(pos);
> + kfree(dr);
> + }
> + }
> if (trace->root) {
> if (--trace->root->users == 0)
> remove_root(trace);
> @@ -142,11 +153,17 @@ static struct trace_root *lookup_root(co
> static struct dentry *create_tree(struct trace_info *trace, const char
> *root,
> const char *name)
> {
> - struct dentry *dir = NULL;
> + struct trace_dir *temp;
>
> if (root == NULL || name == NULL)
> return ERR_PTR(-EINVAL);
>
> + temp = kzalloc(sizeof(struct trace_dir), GFP_KERNEL);
> + if ((temp == NULL) || (strlen(name) > TRACE_NAME_SIZE))
> + return ERR_PTR(-ENOMEM);
> +
> + strlcpy(temp->trace_dir_name, name, sizeof(temp->trace_dir_name));
> +
> mutex_lock(&trace_mutex);
>
> trace->root = lookup_root(root);
> @@ -155,17 +172,49 @@ static struct dentry *create_tree(struct
> goto err;
> }
>
> - dir = debugfs_create_dir(name, trace->root->root);
> - if (IS_ERR(dir))
> + temp->trace_root = trace->root->root;
> + temp->trace_dir = debugfs_create_dir(name, trace->root->root);
> +
> + if (IS_ERR(temp->trace_dir))
> remove_root(trace);
> - else
> + else {
> trace->root->users++;
> + temp->ti = trace;
> + list_add_tail(&temp->trace_dir_list, &trace_dirs);
> + }
>
> err:
> mutex_unlock(&trace_mutex);
> - return dir;
> + return temp->trace_dir;
> }
>
> +int trace_exists(const char *parent_dir, const char *dir,
> + struct trace_info **ti)
> +{
> + struct list_head *pos;
> + struct trace_root *r;
> + struct trace_dir *temp;
> +
> + list_for_each(pos, &trace_roots) {
> + r = list_entry(pos, struct trace_root, list);
> + if (!strcmp(parent_dir, r->name))
> + goto search_dir;
> + }
> + return TRACE_PARENT_DIR_ABSENT;
> +
> + search_dir:
> + list_for_each(pos, &trace_dirs) {
> + temp = list_entry(pos, struct trace_dir, trace_dir_list);
> +
> + if (!strcmp(dir, temp->trace_dir_name)) {
> + *ti = temp->ti;
> + return TRACE_DIR_EXISTS;
> + }
> + }
> + return TRACE_PARENT_DIR_EXISTS;
> +}
> +EXPORT_SYMBOL_GPL(trace_exists);
> +
> static int dropped_open(struct inode *inode, struct file *filp)
> {
> filp->private_data = inode->i_private;
> @@ -561,3 +610,148 @@ void trace_cleanup(struct trace_info *tr
> kfree(trace);
> }
> EXPORT_SYMBOL_GPL(trace_cleanup);
> +
> +/**
> + * trace_cleanup_all - Removes all trace directories under a parent_dir
> + * @parent_dir: Name of the parent directory
> + */
> +void trace_cleanup_all(const char *parent_dir)
> +{
> + struct list_head *pos, *pos_temp;
> + struct trace_dir *temp;
> +
> + list_for_each_safe(pos, pos_temp, &trace_dirs) {
> + temp = list_entry(pos, struct trace_dir, trace_dir_list);
> + if (!strncmp(parent_dir, temp->trace_root->d_iname,
> strlen(parent_dir)))
> + trace_cleanup(temp->ti);
> + }
> +}
> +EXPORT_SYMBOL_GPL(trace_cleanup_all);
> +
> +/*
> + * Send formatted trace data to trace channel.
> + */
> +static int trace_printf(struct trace_info *trace, const char *format,
> + va_list ap)
> +{
> + va_list aq;
> + char *record;
> + int len, ret = 0;
> +
> + if (trace_running(trace)) {
> + va_copy(aq, ap);
> + len = vsnprintf(NULL, 0, format, aq);
> + va_end(aq);
> + record = relay_reserve(trace->rchan, ++len);
> + if (record)
> + ret = vsnprintf(record, len, format, ap);
> + }
> + return ret;
> +}
> +
> +static inline int init_trace_interface(struct trace_printk_data *tpk)
> +{
> + int ret = 0;
> + tpk->exists = trace_exists(tpk->parent_dir, tpk->dir, &tpk->ti);
> +
> + switch(tpk->exists) {
> +
> + case TRACE_PARENT_DIR_EXISTS:
> + case TRACE_PARENT_DIR_ABSENT:
> + if(!tpk->buf_size)
> + tpk->buf_size = DEFAULT_TRACE_BUF_SIZE;
> + if(!tpk->sub_buf_size)
> + tpk->sub_buf_size = DEFAULT_TRACE_SUB_BUF_NR;
> + tpk->ti = trace_setup(tpk->parent_dir, tpk->dir,
> + tpk->buf_size, tpk->sub_buf_size, tpk->flags);
> + printk(KERN_INFO "Trace interface %s setup\n",
> + tpk->ti->dir->d_iname);
> + if (IS_ERR(tpk->ti)) {
> + printk(KERN_ERR "Error initialising %s interface\n",
> + tpk->ti->dir->d_iname);
> + return -EPERM;
> + }
> + /* Fall through */
> + case TRACE_DIR_EXISTS:
> + if (tpk->ti->state == TRACE_SETUP)
> + ret = trace_start(tpk->ti);
> + else
> + ret = -EPERM;
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * trace_printk - Output a string to debugfs mount 'directly' using
> 'trace'
> + * @tpk: Structure containing info such as parent_dir and directory
> + * @format: String containing format string specifiers
> + * @ap: List of arguments
> + */
> +int trace_printk(struct trace_printk_data *tpk, char *format, ...)
> +{
> + int ret = 0;
> + va_list(ap);
> + unsigned long flags = 0;
> +
> + va_start(ap, format);
> +
> + ret = init_trace_interface(tpk);
> + if (unlikely(ret))
> + return ret;
> +
> + /* Now do the actual printing */
> + /* Take an RCU Lock over the trace_info state */
> + rcu_read_lock();
> + /* Take a spinlock for the global buffer used by relay */
> + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> + spin_lock_irqsave(&tpk->ti->trace_lock, flags);
> + ret = trace_printf(tpk->ti, format, ap);
> + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> + spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
> + rcu_read_unlock();
> +
> + va_end(ap);
> + return ret;
> +}
> +EXPORT_SYMBOL(trace_printk);
> +
> +/**
> + * trace_dump - Output binary into debugfs mount 'directly' using 'trace'
> + * @tpk: Structure containing info such as parent_dir and directory
> + * @output: Data that needs to be output
> + * @output_len: Length of the output data
> + */
> +int trace_dump(struct trace_printk_data *tpk, const void *output,
> + const int output_len)
> +{
> + char *record;
> + unsigned long flags = 0;
> + int ret = 0;
> +
> + ret = init_trace_interface(tpk);
> + if (unlikely(ret))
> + return ret;
> +
> + /* Now do the actual printing */
> + rcu_read_lock();
> + /* Take a spinlock for the global buffer used by relay */
> + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> + spin_lock_irqsave(&tpk->ti->trace_lock, flags);
> + record = relay_reserve(tpk->ti->rchan, output_len);
> +
> + if (record && trace_running(tpk->ti))
> + memcpy(record, output, output_len);
> + else {
> + if(record)
> + ret = -EPERM;
> + else
> + ret = -ENOMEM;
> + }
> + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> + spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
> + rcu_read_unlock();
> +
> + return ret;
> +}
> +EXPORT_SYMBOL(trace_dump);
> Index: linux-blktrace-many/samples/trace/fork_new_trace.c
> ===================================================================
> --- /dev/null
> +++ linux-blktrace-many/samples/trace/fork_new_trace.c
> @@ -0,0 +1,99 @@
> +/*
> + * An example of using trace in a kprobes module
> + *
> + * Copyright (C) 2008 IBM Inc.
> + *
> + * K.Prasad <prasad@linux.vnet.ibm.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
> 02110-1301 USA
> + *
> + * -------
> + * This module creates a trace channel and places a kprobe
> + * on the function do_fork(). The value of current->pid is written to
> + * the trace channel each time the kprobe is hit..
> + *
> + * How to run the example:
> + * $ mount -t debugfs /debug
> + * $ insmod fork_new_trace.ko
> + *
> + * To view the data produced by the module:
> + * $ cat /debug/trace_example/do_fork/trace0
> + *
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/kprobes.h>
> +#include <linux/trace.h>
> +
> +#define SAMPLE_PARENT_DIR "trace_new_example"
> +#define PROBE_POINT "do_fork"
> +
> +static struct kprobe kp;
> +static struct trace_printk_data *tpk;
> +
> +static int handler_pre(struct kprobe *p, struct pt_regs *regs)
> +{
> + trace_printk(tpk, "%d\n", current->pid);
> + return 0;
> +}
> +
> +int init_module(void)
> +{
> + int ret = 0;
> + int len_parent_dir, len_dir;
> +
> + /* setup the kprobe */
> + kp.pre_handler = handler_pre;
> + kp.post_handler = NULL;
> + kp.fault_handler = NULL;
> + kp.symbol_name = PROBE_POINT;
> + ret = register_kprobe(&kp);
> + if (ret) {
> + printk(KERN_ERR "fork_trace: register_kprobe failed\n");
> + return ret;
> + }
> +
> + len_parent_dir = strlen(SAMPLE_PARENT_DIR) + 1;
> + /* Initialising len_dir to the larger of the two dir names */
> + len_dir = strlen("kprobe_struct") + 1;
> +
> + tpk = kzalloc(sizeof(*tpk), GFP_KERNEL);
> + if(!tpk)
> + ret = 1;
> +
> + tpk->parent_dir = SAMPLE_PARENT_DIR;
> +
> + /* Let's do a binary dump of struct kprobe using trace_dump */
> + tpk->dir = "kprobes_struct";
> + tpk->flags = TRACE_GLOBAL_CHANNEL;
> + trace_dump(tpk, &kp, sizeof(kp));
> +
> + /* Now change the directory to collect fork pid data */
> + tpk->dir = PROBE_POINT;
> +
> + if(ret)
> + printk(KERN_ERR "Unable to find required free memory. "
> + "Trace new sample module loading aborted");
> + return ret;
> +}
> +
> +void cleanup_module(void)
> +{
> + unregister_kprobe(&kp);
> +
> + /* Just a single cleanup call passing the parent dir string */
> + trace_cleanup_all(SAMPLE_PARENT_DIR);
> +}
> +MODULE_LICENSE("GPL");
> Index: linux-blktrace-many/samples/trace/Makefile
> ===================================================================
> --- linux-blktrace-many.orig/samples/trace/Makefile
> +++ linux-blktrace-many/samples/trace/Makefile
> @@ -1,4 +1,4 @@
> # builds the trace example kernel modules;
> # then to use (as root): insmod <fork_trace.ko>
>
> -obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o
> +obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o fork_new_trace.o
> Index: linux-blktrace-many/Documentation/trace.txt
> ===================================================================
> --- linux-blktrace-many.orig/Documentation/trace.txt
> +++ linux-blktrace-many/Documentation/trace.txt
> @@ -150,6 +150,28 @@ The steps a kernel data provider takes t
> 5) Destroy the trace channel and underlying relay channel -
> trace_cleanup().
>
> +Alternatively the user may choose to make use of two new interfaces --
> +trace_printk() and trace_dump() -- to setup trace interface and
> +trace_cleanup_all() to tear-down the same.
> +
> +Steps to use:
> +1) Create and populate an instance of trace_printk_data structure. The
> fields
> + parent_dir and dir are mandatory. The fields buf_size, sub_buf_size
> and flags
> + are optional and will take default values if not populated. The field
> + 'exists' and ti are for the trace infrastructure to use. The pointer
> to the
> + 'struct trace_info' i.e. ti may be used to perform fine granular
> operations
> + such as determine the state of the 'trace', stop individual traces,
> etc.
> +2) Default values for buf_size and sub_buf_size are 4096, 40 respectively.
> +3) Use trace_dump() to output binary data which may be acted upon by a
> + high-level program (say dumping a structure). trace_printk() can be
> used
> + for string output. Pass a pointer to the instance of trace_printk_data
> + structure to these functions along with other parameters. The output
> from
> + these functions can be found at
> + <debugfs_mount>/<parent_dir>/<dir>/trace<0..n>.
> +4) trace_cleanup_all() for a given parent directory will cleanup and
> remove all
> + trace directories created under the specified directory.
> +5) Sample code for the same can be found in samples/trace/fork_new_trace.c
> +
> Kernel Configuration
> --------------------
> To use trace, configure your kernel with CONFIG_TRACE=y. Trace depends on
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC Patch 1/1] trace_printk and trace_dump interface - v2
2008-05-17 2:22 ` K.Prasad
@ 2008-05-19 23:21 ` Andrew Morton
2008-05-20 19:53 ` K.Prasad
0 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2008-05-19 23:21 UTC (permalink / raw)
To: K.Prasad; +Cc: linux-kernel, dwilder, prasad
On Sat, 17 May 2008 07:52:16 +0530
"K.Prasad" <prasad@linux.vnet.ibm.com> wrote:
> Resending this patch due to style issues found in previous patch.
>
> This patch introduces two new interfaces called trace_printk and
> trace_dump which can be used to print to the debugfs mount directly.
> It uses the 'trace' infrastructure underneath and is a patch over it.
> A sample file is also created to demonstrate its ease of use.
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> ---
> Documentation/trace.txt | 22 ++++
> include/linux/trace.h | 57 +++++++++++
> lib/trace.c | 205
> ++++++++++++++++++++++++++++++++++++++++-
> samples/trace/Makefile | 2
> samples/trace/fork_new_trace.c | 99 +++++++++++++++++++
The patch is wordwrapped and space-stuffed.
> ...
>
> static void remove_tree(struct trace_info *trace)
> {
> + struct list_head *pos, *temp;
> + struct trace_dir *dr = NULL;
> +
> mutex_lock(&trace_mutex);
> debugfs_remove(trace->dir);
>
> + list_for_each_safe(pos, temp, &trace_dirs) {
> + dr = list_entry(pos, struct trace_dir, trace_dir_list);
list_for_each_entry_safe()?
> + if (dr->ti == trace) {
> + list_del(pos);
> + kfree(dr);
> + }
> + }
> if (trace->root) {
> if (--trace->root->users == 0)
> remove_root(trace);
> @@ -142,11 +153,17 @@ static struct trace_root *lookup_root(co
> static struct dentry *create_tree(struct trace_info *trace, const char
> *root,
> const char *name)
> {
> - struct dentry *dir = NULL;
> + struct trace_dir *temp;
>
> if (root == NULL || name == NULL)
> return ERR_PTR(-EINVAL);
>
> + temp = kzalloc(sizeof(struct trace_dir), GFP_KERNEL);
> + if ((temp == NULL) || (strlen(name) > TRACE_NAME_SIZE))
> + return ERR_PTR(-ENOMEM);
This can leak `temp'.
I suspect it has an off-by-one.
You should check the incoming args before allocating any resources or
doing anything which has side-effects.
> + strlcpy(temp->trace_dir_name, name, sizeof(temp->trace_dir_name));
then use kstrdup().
Please don't call variables "temp" or "tmp". Surely something more
communcative can be thought up.
> mutex_lock(&trace_mutex);
>
> trace->root = lookup_root(root);
> @@ -155,17 +172,49 @@ static struct dentry *create_tree(struct
> goto err;
> }
>
> - dir = debugfs_create_dir(name, trace->root->root);
> - if (IS_ERR(dir))
> + temp->trace_root = trace->root->root;
> + temp->trace_dir = debugfs_create_dir(name, trace->root->root);
> +
> + if (IS_ERR(temp->trace_dir))
> remove_root(trace);
> - else
> + else {
> trace->root->users++;
> + temp->ti = trace;
> + list_add_tail(&temp->trace_dir_list, &trace_dirs);
> + }
>
> err:
> mutex_unlock(&trace_mutex);
> - return dir;
> + return temp->trace_dir;
> }
>
> +int trace_exists(const char *parent_dir, const char *dir,
> + struct trace_info **ti)
> +{
> + struct list_head *pos;
> + struct trace_root *r;
> + struct trace_dir *temp;
> +
> + list_for_each(pos, &trace_roots) {
> + r = list_entry(pos, struct trace_root, list);
list_for_each_entry()?
> + if (!strcmp(parent_dir, r->name))
> + goto search_dir;
> + }
> + return TRACE_PARENT_DIR_ABSENT;
> +
> + search_dir:
> + list_for_each(pos, &trace_dirs) {
> + temp = list_entry(pos, struct trace_dir, trace_dir_list);
> +
> + if (!strcmp(dir, temp->trace_dir_name)) {
> + *ti = temp->ti;
> + return TRACE_DIR_EXISTS;
> + }
> + }
> + return TRACE_PARENT_DIR_EXISTS;
> +}
> +EXPORT_SYMBOL_GPL(trace_exists);
I wonder if the whole "trace_*" namespace was a good choice. There are
other trace patches in-kernel, out-of-kernel and presumably in our
future.
> static int dropped_open(struct inode *inode, struct file *filp)
> {
> filp->private_data = inode->i_private;
> @@ -561,3 +610,149 @@ void trace_cleanup(struct trace_info *tr
> kfree(trace);
> }
> EXPORT_SYMBOL_GPL(trace_cleanup);
> +
> +/**
> + * trace_cleanup_all - Removes all trace directories under a parent_dir
> + * @parent_dir: Name of the parent directory
> + */
> +void trace_cleanup_all(const char *parent_dir)
> +{
> + struct list_head *pos, *pos_temp;
> + struct trace_dir *temp;
> +
> + list_for_each_safe(pos, pos_temp, &trace_dirs) {
> + temp = list_entry(pos, struct trace_dir, trace_dir_list);
list_for_each_entry_safe()?
> + if (!strncmp(parent_dir, temp->trace_root->d_iname, \
> + strlen(parent_dir)))
> + trace_cleanup(temp->ti);
> + }
> +}
> +EXPORT_SYMBOL_GPL(trace_cleanup_all);
>
> ...
>
> +static inline int init_trace_interface(struct trace_printk_data *tpk)
> +{
> + int ret = 0;
> + tpk->exists = trace_exists(tpk->parent_dir, tpk->dir, &tpk->ti);
> +
> + switch (tpk->exists) {
> +
> + case TRACE_PARENT_DIR_EXISTS:
> + case TRACE_PARENT_DIR_ABSENT:
> + if (!tpk->buf_size)
> + tpk->buf_size = DEFAULT_TRACE_BUF_SIZE;
> + if (!tpk->sub_buf_size)
> + tpk->sub_buf_size = DEFAULT_TRACE_SUB_BUF_NR;
> + tpk->ti = trace_setup(tpk->parent_dir, tpk->dir,
> + tpk->buf_size, tpk->sub_buf_size, tpk->flags);
> + printk(KERN_INFO "Trace interface %s setup\n",
> + tpk->ti->dir->d_iname);
> + if (IS_ERR(tpk->ti)) {
> + printk(KERN_ERR "Error initialising %s interface\n",
> + tpk->ti->dir->d_iname);
> + return -EPERM;
> + }
> + /* Fall through */
> + case TRACE_DIR_EXISTS:
> + if (tpk->ti->state == TRACE_SETUP)
> + ret = trace_start(tpk->ti);
> + else
> + ret = -EPERM;
> + }
> +
> + return 0;
> +}
Two callsites, far too large to be inlined.
Please just don't use inline at all, except in exceptional
circumstances. The compiler will work it out for normal usage.
> +/**
> + * trace_printk - Output a string to debugfs mount 'directly' using
> 'trace'
> + * @tpk: Structure containing info such as parent_dir and directory
> + * @format: String containing format string specifiers
> + * @ap: List of arguments
> + */
> +int trace_printk(struct trace_printk_data *tpk, char *format, ...)
> +{
> + int ret = 0;
> + va_list(ap);
> + unsigned long flags = 0;
So I look at this and wonder "why did that need initialising?"
> + va_start(ap, format);
> +
> + ret = init_trace_interface(tpk);
> + if (unlikely(ret))
> + return ret;
> +
> + /* Now do the actual printing */
> + /* Take an RCU Lock over the trace_info state */
> + rcu_read_lock();
> + /* Take a spinlock for the global buffer used by relay */
> + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> + spin_lock_irqsave(&tpk->ti->trace_lock, flags);
ah, because you got a compiler warning.
That's why we have "uninitialized_var()": so the reader can understand
what is happening. Plus uninitialized_var() generates no code, whereas
that assignment generates additional text.
uninitialized_var() is fairly sucky, but adding a mysterious,
seemingly-unneeded and code-generating "= 0" is suckier.
> + ret = trace_printf(tpk->ti, format, ap);
> + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> + spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
> + rcu_read_unlock();
> +
> + va_end(ap);
> + return ret;
> +}
> +EXPORT_SYMBOL(trace_printk);
> +
> +/**
> + * trace_dump - Output binary into debugfs mount 'directly' using 'trace'
> + * @tpk: Structure containing info such as parent_dir and directory
> + * @output: Data that needs to be output
> + * @output_len: Length of the output data
> + */
> +int trace_dump(struct trace_printk_data *tpk, const void *output,
> + const int output_len)
> +{
> + char *record;
> + unsigned long flags = 0;
there too.
> + int ret = 0;
> +
> + ret = init_trace_interface(tpk);
> + if (unlikely(ret))
> + return ret;
> +
> + /* Now do the actual printing */
> + rcu_read_lock();
> + /* Take a spinlock for the global buffer used by relay */
> + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> + spin_lock_irqsave(&tpk->ti->trace_lock, flags);
Making a callee's locking behaviour dependent upon an incoming argument
is considered poor style and earns nastygrams from Linus. Is there no
sane alternative?
> + record = relay_reserve(tpk->ti->rchan, output_len);
> +
> + if (record && trace_running(tpk->ti))
> + memcpy(record, output, output_len);
> + else {
> + if (record)
> + ret = -EPERM;
> + else
> + ret = -ENOMEM;
> + }
> + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> + spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
> + rcu_read_unlock();
> +
> + return ret;
> +}
> +EXPORT_SYMBOL(trace_dump);
>
> ...
>
> +int init_module(void)
> +{
> + int ret = 0;
> + int len_parent_dir, len_dir;
> +
> + /* setup the kprobe */
> + kp.pre_handler = handler_pre;
> + kp.post_handler = NULL;
> + kp.fault_handler = NULL;
> + kp.symbol_name = PROBE_POINT;
> + ret = register_kprobe(&kp);
> + if (ret) {
> + printk(KERN_ERR "fork_trace: register_kprobe failed\n");
> + return ret;
> + }
> +
> + len_parent_dir = strlen(SAMPLE_PARENT_DIR) + 1;
> + /* Initialising len_dir to the larger of the two dir names */
> + len_dir = strlen("kprobe_struct") + 1;
> +
> + tpk = kzalloc(sizeof(*tpk), GFP_KERNEL);
> + if (!tpk)
> + ret = 1;
> +
> + tpk->parent_dir = SAMPLE_PARENT_DIR;
> +
> + /* Let's do a binary dump of struct kprobe using trace_dump */
> + tpk->dir = "kprobes_struct";
> + tpk->flags = TRACE_GLOBAL_CHANNEL;
> + trace_dump(tpk, &kp, sizeof(kp));
> +
> + /* Now change the directory to collect fork pid data */
> + tpk->dir = PROBE_POINT;
> +
> + if (ret)
> + printk(KERN_ERR "Unable to find required free memory. "
> + "Trace new sample module loading aborted");
> + return ret;
> +}
> +
> +void cleanup_module(void)
> +{
> + unregister_kprobe(&kp);
> +
> + /* Just a single cleanup call passing the parent dir string */
> + trace_cleanup_all(SAMPLE_PARENT_DIR);
> +}
> +MODULE_LICENSE("GPL");
Do init_module() and cleanup_module() actually get called? We _used_
to have magic code which automatically calls those functions but I'd
have thought it got removed years ago?
Anyway, we should be using module_init() and module_exit() here, and
those functions should have static scope.
> Index: linux-blktrace-many/samples/trace/Makefile
> ===================================================================
> --- linux-blktrace-many.orig/samples/trace/Makefile
> +++ linux-blktrace-many/samples/trace/Makefile
> @@ -1,4 +1,4 @@
> # builds the trace example kernel modules;
> # then to use (as root): insmod <fork_trace.ko>
>
> -obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o
> +obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o fork_new_trace.o
> Index: linux-blktrace-many/Documentation/trace.txt
> ===================================================================
> --- linux-blktrace-many.orig/Documentation/trace.txt
> +++ linux-blktrace-many/Documentation/trace.txt
> @@ -150,6 +150,28 @@ The steps a kernel data provider takes t
> 5) Destroy the trace channel and underlying relay channel -
> trace_cleanup().
>
> +Alternatively the user may choose to make use of two new interfaces --
> +trace_printk() and trace_dump() -- to setup trace interface and
> +trace_cleanup_all() to tear-down the same.
> +
> +Steps to use:
> +1) Create and populate an instance of trace_printk_data structure. The
> fields
> + parent_dir and dir are mandatory. The fields buf_size, sub_buf_size
> and flags
> + are optional and will take default values if not populated. The field
> + 'exists' and ti are for the trace infrastructure to use. The pointer
> to the
> + 'struct trace_info' i.e. ti may be used to perform fine granular
> operations
> + such as determine the state of the 'trace', stop individual traces,
> etc.
> +2) Default values for buf_size and sub_buf_size are 4096, 40 respectively.
> +3) Use trace_dump() to output binary data which may be acted upon by a
> + high-level program (say dumping a structure). trace_printk() can be
> used
> + for string output. Pass a pointer to the instance of trace_printk_data
> + structure to these functions along with other parameters. The output
> from
> + these functions can be found at
> + <debugfs_mount>/<parent_dir>/<dir>/trace<0..n>.
> +4) trace_cleanup_all() for a given parent directory will cleanup and
> remove all
> + trace directories created under the specified directory.
> +5) Sample code for the same can be found in samples/trace/fork_new_trace.c
wordwrapping...
I often fix it, but I think we need v3 on this one.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC Patch 1/1] trace_printk and trace_dump interface - v2
2008-05-19 23:21 ` Andrew Morton
@ 2008-05-20 19:53 ` K.Prasad
2008-05-20 20:12 ` Andrew Morton
0 siblings, 1 reply; 11+ messages in thread
From: K.Prasad @ 2008-05-20 19:53 UTC (permalink / raw)
To: Andrew Morton; +Cc: K.Prasad, linux-kernel, dwilder, hch@infradead.org
On Mon, May 19, 2008 at 04:21:49PM -0700, Andrew Morton wrote:
> On Sat, 17 May 2008 07:52:16 +0530
> "K.Prasad" <prasad@linux.vnet.ibm.com> wrote:
>> Resending this patch due to style issues found in previous patch.
> >> This patch introduces two new interfaces called trace_printk and
> > trace_dump which can be used to print to the debugfs mount directly.
> > It uses the 'trace' infrastructure underneath and is a patch over it.
> > A sample file is also created to demonstrate its ease of use.
> >> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > ---
> > Documentation/trace.txt | 22 ++++
> > include/linux/trace.h | 57 +++++++++++
> > lib/trace.c | 205 > ++++++++++++++++++++++++++++++++++++++++-
> > samples/trace/Makefile | 2
> > samples/trace/fork_new_trace.c | 99 +++++++++++++++++++
> The patch is wordwrapped and space-stuffed.
Must be an act of my mail client. Will re-send with proper word-wrapping
and spacing during the next iteration.
>> ...
> >
> > static void remove_tree(struct trace_info *trace)
> > {
> > + struct list_head *pos, *temp;
> > + struct trace_dir *dr = NULL;
> > +
> > mutex_lock(&trace_mutex);
> > debugfs_remove(trace->dir);
> >> + list_for_each_safe(pos, temp, &trace_dirs) {
> > + dr = list_entry(pos, struct trace_dir, trace_dir_list);
> list_for_each_entry_safe()?
Ok. Will change.
>> + if (dr->ti == trace) {
> > + list_del(pos);
> > + kfree(dr);
> > + }
> > + }
> > if (trace->root) {
> > if (--trace->root->users == 0)
> > remove_root(trace);
> > @@ -142,11 +153,17 @@ static struct trace_root *lookup_root(co
> > static struct dentry *create_tree(struct trace_info *trace, const char > *root,
> > const char *name)
> > {
> > - struct dentry *dir = NULL;
> > + struct trace_dir *temp;
> >> if (root == NULL || name == NULL)
> > return ERR_PTR(-EINVAL);
> >> + temp = kzalloc(sizeof(struct trace_dir), GFP_KERNEL);
> > + if ((temp == NULL) || (strlen(name) > TRACE_NAME_SIZE))
> > + return ERR_PTR(-ENOMEM);
> This can leak `temp'.
Yes, I missed it. Will modify to look like this:
if (strlen(name) > TRACE_NAME_SIZE)
return ERR_PTR(-ENOMEM);
temp = kzalloc(sizeof(struct trace_dir), GFP_KERNEL);
if (temp == NULL)
return ERR_PTR(-ENOMEM);
> I suspect it has an off-by-one.
> You should check the incoming args before allocating any resources or
> doing anything which has side-effects.
>> + strlcpy(temp->trace_dir_name, name, sizeof(temp->trace_dir_name));
> then use kstrdup().
Ok.
> Please don't call variables "temp" or "tmp". Surely something more
> communcative can be thought up.
Ok. Will name this instance,
struct trace_dir *tdd;
>> mutex_lock(&trace_mutex);
> >> trace->root = lookup_root(root);
> > @@ -155,17 +172,49 @@ static struct dentry *create_tree(struct
> > goto err;
> > }
> >> - dir = debugfs_create_dir(name, trace->root->root);
> > - if (IS_ERR(dir))
> > + temp->trace_root = trace->root->root;
> > + temp->trace_dir = debugfs_create_dir(name, trace->root->root);
> > +
> > + if (IS_ERR(temp->trace_dir))
> > remove_root(trace);
> > - else
> > + else {
> > trace->root->users++;
> > + temp->ti = trace;
> > + list_add_tail(&temp->trace_dir_list, &trace_dirs);
> > + }
> >> err:
> > mutex_unlock(&trace_mutex);
> > - return dir;
> > + return temp->trace_dir;
> > }
> >> +int trace_exists(const char *parent_dir, const char *dir,
> > + struct trace_info **ti)
> > +{
> > + struct list_head *pos;
> > + struct trace_root *r;
> > + struct trace_dir *temp;
> > +
> > + list_for_each(pos, &trace_roots) {
> > + r = list_entry(pos, struct trace_root, list);
> list_for_each_entry()?
>> + if (!strcmp(parent_dir, r->name))
> > + goto search_dir;
> > + }
> > + return TRACE_PARENT_DIR_ABSENT;
> > +
> > + search_dir:
> > + list_for_each(pos, &trace_dirs) {
> > + temp = list_entry(pos, struct trace_dir, trace_dir_list);
> > +
> > + if (!strcmp(dir, temp->trace_dir_name)) {
> > + *ti = temp->ti;
> > + return TRACE_DIR_EXISTS;
> > + }
> > + }
> > + return TRACE_PARENT_DIR_EXISTS;
> > +}
> > +EXPORT_SYMBOL_GPL(trace_exists);
> I wonder if the whole "trace_*" namespace was a good choice. There are
> other trace patches in-kernel, out-of-kernel and presumably in our
> future.
The name 'trace' (previously GTSC), I gather that it was the chosen after
much deliberation (http://tinyurl.com/6odoh4), however I'm open to the
idea of changing the name (say dbg_printk/dbg_dump?).
Kindly let me know of your suggestions for this, and I will change them
during the next version.
>> static int dropped_open(struct inode *inode, struct file *filp)
> > {
> > filp->private_data = inode->i_private;
> > @@ -561,3 +610,149 @@ void trace_cleanup(struct trace_info *tr
> > kfree(trace);
> > }
> > EXPORT_SYMBOL_GPL(trace_cleanup);
> > +
> > +/**
> > + * trace_cleanup_all - Removes all trace directories under a parent_dir
> > + * @parent_dir: Name of the parent directory
> > + */
> > +void trace_cleanup_all(const char *parent_dir)
> > +{
> > + struct list_head *pos, *pos_temp;
> > + struct trace_dir *temp;
> > +
> > + list_for_each_safe(pos, pos_temp, &trace_dirs) {
> > + temp = list_entry(pos, struct trace_dir, trace_dir_list);
> list_for_each_entry_safe()?
>> + if (!strncmp(parent_dir, temp->trace_root->d_iname, \
> > + strlen(parent_dir)))
> > + trace_cleanup(temp->ti);
> > + }
> > +}
> > +EXPORT_SYMBOL_GPL(trace_cleanup_all);
> >
> > ...
> >
> > +static inline int init_trace_interface(struct trace_printk_data *tpk)
> > +{
> > + int ret = 0;
> > + tpk->exists = trace_exists(tpk->parent_dir, tpk->dir, &tpk->ti);
> > +
> > + switch (tpk->exists) {
> > +
> > + case TRACE_PARENT_DIR_EXISTS:
> > + case TRACE_PARENT_DIR_ABSENT:
> > + if (!tpk->buf_size)
> > + tpk->buf_size = DEFAULT_TRACE_BUF_SIZE;
> > + if (!tpk->sub_buf_size)
> > + tpk->sub_buf_size = DEFAULT_TRACE_SUB_BUF_NR;
> > + tpk->ti = trace_setup(tpk->parent_dir, tpk->dir,
> > + tpk->buf_size, tpk->sub_buf_size, tpk->flags);
> > + printk(KERN_INFO "Trace interface %s setup\n",
> > + tpk->ti->dir->d_iname);
> > + if (IS_ERR(tpk->ti)) {
> > + printk(KERN_ERR "Error initialising %s interface\n",
> > + tpk->ti->dir->d_iname);
> > + return -EPERM;
> > + }
> > + /* Fall through */
> > + case TRACE_DIR_EXISTS:
> > + if (tpk->ti->state == TRACE_SETUP)
> > + ret = trace_start(tpk->ti);
> > + else
> > + ret = -EPERM;
> > + }
> > +
> > + return 0;
> > +}
> Two callsites, far too large to be inlined.
> Please just don't use inline at all, except in exceptional
> circumstances. The compiler will work it out for normal usage.
That 'inline' was unintentional. I will remove it.
>> +/**
> > + * trace_printk - Output a string to debugfs mount 'directly' using > 'trace'
> > + * @tpk: Structure containing info such as parent_dir and directory
> > + * @format: String containing format string specifiers
> > + * @ap: List of arguments
> > + */
> > +int trace_printk(struct trace_printk_data *tpk, char *format, ...)
> > +{
> > + int ret = 0;
> > + va_list(ap);
> > + unsigned long flags = 0;
> So I look at this and wonder "why did that need initialising?"
>> + va_start(ap, format);
> > +
> > + ret = init_trace_interface(tpk);
> > + if (unlikely(ret))
> > + return ret;
> > +
> > + /* Now do the actual printing */
> > + /* Take an RCU Lock over the trace_info state */
> > + rcu_read_lock();
> > + /* Take a spinlock for the global buffer used by relay */
> > + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> > + spin_lock_irqsave(&tpk->ti->trace_lock, flags);
> ah, because you got a compiler warning.
> That's why we have "uninitialized_var()": so the reader can understand
> what is happening. Plus uninitialized_var() generates no code, whereas
> that assignment generates additional text.
> uninitialized_var() is fairly sucky, but adding a mysterious,
> seemingly-unneeded and code-generating "= 0" is suckier.
Ok. Will modify it:
uninitialized_var(flags);
>> + ret = trace_printf(tpk->ti, format, ap);
> > + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> > + spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
> > + rcu_read_unlock();
> > +
> > + va_end(ap);
> > + return ret;
> > +}
> > +EXPORT_SYMBOL(trace_printk);
> > +
> > +/**
> > + * trace_dump - Output binary into debugfs mount 'directly' using 'trace'
> > + * @tpk: Structure containing info such as parent_dir and directory
> > + * @output: Data that needs to be output
> > + * @output_len: Length of the output data
> > + */
> > +int trace_dump(struct trace_printk_data *tpk, const void *output,
> > + const int output_len)
> > +{
> > + char *record;
> > + unsigned long flags = 0;
> there too.
>> + int ret = 0;
> > +
> > + ret = init_trace_interface(tpk);
> > + if (unlikely(ret))
> > + return ret;
> > +
> > + /* Now do the actual printing */
> > + rcu_read_lock();
> > + /* Take a spinlock for the global buffer used by relay */
> > + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> > + spin_lock_irqsave(&tpk->ti->trace_lock, flags);
> Making a callee's locking behaviour dependent upon an incoming argument
> is considered poor style and earns nastygrams from Linus. Is there no
> sane alternative?
Will something like this look better?
static int trace_dump_per_cpu_channel(struct trace_printk_data *tpk,
const void *output, const int output_len)
{
char *record;
int ret = 0;
record = relay_reserve(tpk->ti->rchan, output_len);
if (record && trace_running(tpk->ti))
memcpy(record, output, output_len);
else {
if (record)
ret = -EPERM;
else
ret = -ENOMEM;
}
return ret;
}
static int trace_dump_global_channel(struct trace_printk_data *tpk,
const void *output, const int output_len)
{
int ret;
uninitialized_var(flags);
spin_lock_irqsave(&tpk->ti->trace_lock, flags);
ret = trace_dump_per_cpu_channel(tpk, output, output_len);
spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
return ret;
}
int trace_dump(struct trace_printk_data *tpk, const void *output,
const int output_len)
{
unsigned long flags = 0;
int ret = 0;
ret = init_trace_interface(tpk);
if (unlikely(ret))
return ret;
/* Now do the actual printing */
rcu_read_lock();
/* Take a spinlock for the global buffer used by relay */
if (tpk->flags & TRACE_GLOBAL_CHANNEL)
ret = trace_dump_global_channel(tpk, output, output_len);
else
ret = trace_dump_per_cpu_channel(tpk, output, output_len);
rcu_read_unlock();
return ret;
}
>> + record = relay_reserve(tpk->ti->rchan, output_len);
> > +
> > + if (record && trace_running(tpk->ti))
> > + memcpy(record, output, output_len);
> > + else {
> > + if (record)
> > + ret = -EPERM;
> > + else
> > + ret = -ENOMEM;
> > + }
> > + if (tpk->flags & TRACE_GLOBAL_CHANNEL)
> > + spin_unlock_irqrestore(&tpk->ti->trace_lock, flags);
> > + rcu_read_unlock();
> > +
> > + return ret;
> > +}
> > +EXPORT_SYMBOL(trace_dump);
> >
> > ...
> >
> > +int init_module(void)
> > +{
> > + int ret = 0;
> > + int len_parent_dir, len_dir;
> > +
> > + /* setup the kprobe */
> > + kp.pre_handler = handler_pre;
> > + kp.post_handler = NULL;
> > + kp.fault_handler = NULL;
> > + kp.symbol_name = PROBE_POINT;
> > + ret = register_kprobe(&kp);
> > + if (ret) {
> > + printk(KERN_ERR "fork_trace: register_kprobe failed\n");
> > + return ret;
> > + }
> > +
> > + len_parent_dir = strlen(SAMPLE_PARENT_DIR) + 1;
> > + /* Initialising len_dir to the larger of the two dir names */
> > + len_dir = strlen("kprobe_struct") + 1;
> > +
> > + tpk = kzalloc(sizeof(*tpk), GFP_KERNEL);
> > + if (!tpk)
> > + ret = 1;
> > +
> > + tpk->parent_dir = SAMPLE_PARENT_DIR;
> > +
> > + /* Let's do a binary dump of struct kprobe using trace_dump */
> > + tpk->dir = "kprobes_struct";
> > + tpk->flags = TRACE_GLOBAL_CHANNEL;
> > + trace_dump(tpk, &kp, sizeof(kp));
> > +
> > + /* Now change the directory to collect fork pid data */
> > + tpk->dir = PROBE_POINT;
> > +
> > + if (ret)
> > + printk(KERN_ERR "Unable to find required free memory. "
> > + "Trace new sample module loading aborted");
> > + return ret;
> > +}
> > +
> > +void cleanup_module(void)
> > +{
> > + unregister_kprobe(&kp);
> > +
> > + /* Just a single cleanup call passing the parent dir string */
> > + trace_cleanup_all(SAMPLE_PARENT_DIR);
> > +}
> > +MODULE_LICENSE("GPL");
> Do init_module() and cleanup_module() actually get called? We _used_
> to have magic code which automatically calls those functions but I'd
> have thought it got removed years ago?
> Anyway, we should be using module_init() and module_exit() here, and
> those functions should have static scope.
Ok. This is adapted from legacy code in samples/trace/fork_trace.c.
I will modify the code in both files.
>> Index: linux-blktrace-many/samples/trace/Makefile
> > ===================================================================
> > --- linux-blktrace-many.orig/samples/trace/Makefile
> > +++ linux-blktrace-many/samples/trace/Makefile
> > @@ -1,4 +1,4 @@
> > # builds the trace example kernel modules;
> > # then to use (as root): insmod <fork_trace.ko>
> >> -obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o
> > +obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o fork_new_trace.o
> > Index: linux-blktrace-many/Documentation/trace.txt
> > ===================================================================
> > --- linux-blktrace-many.orig/Documentation/trace.txt
> > +++ linux-blktrace-many/Documentation/trace.txt
> > @@ -150,6 +150,28 @@ The steps a kernel data provider takes t
> > 5) Destroy the trace channel and underlying relay channel -
> > trace_cleanup().
> >> +Alternatively the user may choose to make use of two new interfaces --
> > +trace_printk() and trace_dump() -- to setup trace interface and
> > +trace_cleanup_all() to tear-down the same.
> > +
> > +Steps to use:
> > +1) Create and populate an instance of trace_printk_data structure. The > fields
> > + parent_dir and dir are mandatory. The fields buf_size, sub_buf_size > and flags
> > + are optional and will take default values if not populated. The field
> > + 'exists' and ti are for the trace infrastructure to use. The pointer > to the
> > + 'struct trace_info' i.e. ti may be used to perform fine granular > operations
> > + such as determine the state of the 'trace', stop individual traces, > etc.
> > +2) Default values for buf_size and sub_buf_size are 4096, 40 respectively.
> > +3) Use trace_dump() to output binary data which may be acted upon by a
> > + high-level program (say dumping a structure). trace_printk() can be > used
> > + for string output. Pass a pointer to the instance of trace_printk_data
> > + structure to these functions along with other parameters. The output > from
> > + these functions can be found at
> > + <debugfs_mount>/<parent_dir>/<dir>/trace<0..n>.
> > +4) trace_cleanup_all() for a given parent directory will cleanup and > remove all
> > + trace directories created under the specified directory.
> > +5) Sample code for the same can be found in samples/trace/fork_new_trace.c
> wordwrapping...
> I often fix it, but I think we need v3 on this one.
Thanks for looking into the patches. I will send out the next version of
the patch after hearing from you on the above.
Thanks,
K.Prasad
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC Patch 1/1] trace_printk and trace_dump interface - v2
2008-05-20 19:53 ` K.Prasad
@ 2008-05-20 20:12 ` Andrew Morton
2008-05-23 4:37 ` K.Prasad
0 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2008-05-20 20:12 UTC (permalink / raw)
To: prasad; +Cc: linux-kernel, dwilder, hch
On Wed, 21 May 2008 01:23:09 +0530
"K.Prasad" <prasad@linux.vnet.ibm.com> wrote:
>
> The name 'trace' (previously GTSC), I gather that it was the chosen after
> much deliberation (http://tinyurl.com/6odoh4), however I'm open to the
> idea of changing the name (say dbg_printk/dbg_dump?).
>
> Kindly let me know of your suggestions for this, and I will change them
> during the next version.
Well I was just putting it out there for consideration. Yes, I think
the whole idea of consuming the "trace_*" namespace in this patchset
was ill-advised.
Also, I don't know how to move forward with the whole feature - I
haven't seen a lot of interest from others and I haven't seen much
discussion of how this feature differs from all the other tracing
things which have been floating about.
And even if the proposed patches presently offer unique and useful
features, will one of the other tracing implementations (eg: ltt) later
grow to close that gap?
I'm also a bit dubious about the whole thing based on past experience
with kernel-developer-only in-kernel tools. People just don't use them
much. One example: fault injection.
> Will something like this look better?
If it addresses the comment I raised, sure. Please satisfy yourself
that it does.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC Patch 1/1] trace_printk and trace_dump interface - v2
2008-05-20 20:12 ` Andrew Morton
@ 2008-05-23 4:37 ` K.Prasad
2008-05-28 18:16 ` K.Prasad
0 siblings, 1 reply; 11+ messages in thread
From: K.Prasad @ 2008-05-23 4:37 UTC (permalink / raw)
To: Andrew Morton, zanussi; +Cc: prasad, linux-kernel, dwilder, hch
On Tue, May 20, 2008 at 01:12:25PM -0700, Andrew Morton wrote:
> On Wed, 21 May 2008 01:23:09 +0530
> "K.Prasad" <prasad@linux.vnet.ibm.com> wrote:
>>> The name 'trace' (previously GTSC), I gather that it was the chosen
> after
> > much deliberation (http://tinyurl.com/6odoh4), however I'm open to the
> > idea of changing the name (say dbg_printk/dbg_dump?).
> >> Kindly let me know of your suggestions for this, and I will change
> them
> > during the next version.
> Well I was just putting it out there for consideration. Yes, I think
> the whole idea of consuming the "trace_*" namespace in this patchset
> was ill-advised.
Since "trace_*" uses relay infrastructure underneath, I am thinking, if it
would be acceptable to rename them to "relay_*", say relay_printk() and
relay_dump(). I would be glad to hear from the 'relay' folks about this
thought.
> Also, I don't know how to move forward with the whole feature - I
> haven't seen a lot of interest from others and I haven't seen much
> discussion of how this feature differs from all the other tracing
> things which have been floating about.
More than a tracing mechanism, this is a tool that aids in tracing, by
providing a powerful function that directs its output onto the debugfs
mount path which could be later harnessed by user-space applications
too. A potential in-kernel user could be the 'marker' handlers which
more often would be interested in logging data.
> And even if the proposed patches presently offer unique and useful
> features, will one of the other tracing implementations (eg: ltt) later
> grow to close that gap?
> I'm also a bit dubious about the whole thing based on past experience
> with kernel-developer-only in-kernel tools. People just don't use them
> much. One example: fault injection.
Among the other enhancements that we were contemplating for this
mechanism, to make it more powerful and unique, is the ability to
a)Define callback functions typically invoked everytime before accessing/
printing each variable (which may say, acquire a lock or prefix a
timestamp) by adding fields to the trace_printk_data structure.
b)Provide sequencing information for the output, along with ability to
prefix the output with essential data such as PID, Timestamp, CPUID,
etc.
Thanks for letting us know your thoughts on this.
--K.Prasad
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC Patch 1/1] trace_printk and trace_dump interface - v2
2008-05-23 4:37 ` K.Prasad
@ 2008-05-28 18:16 ` K.Prasad
2008-05-28 18:37 ` [RFC Patch 0/1] Merging Documentation/trace.txt with Documentation/filesystems/relay.txt K.Prasad
2008-05-28 18:48 ` [RFC Patch 2/2] Renaming lib/trace.[ch] files to kernel/relay_debugfs.[ch] and enhancements K.Prasad
0 siblings, 2 replies; 11+ messages in thread
From: K.Prasad @ 2008-05-28 18:16 UTC (permalink / raw)
To: Andrew Morton, zanussi; +Cc: linux-kernel, dwilder, hch, prasad
On Fri, May 23, 2008 at 10:07:50AM +0530, K.Prasad wrote:
> On Tue, May 20, 2008 at 01:12:25PM -0700, Andrew Morton wrote:
>> On Wed, 21 May 2008 01:23:09 +0530
>> "K.Prasad" <prasad@linux.vnet.ibm.com> wrote:
>>>> The name 'trace' (previously GTSC), I gather that it was the chosen
>> after
>> > much deliberation (http://tinyurl.com/6odoh4), however I'm open to the
>> > idea of changing the name (say dbg_printk/dbg_dump?).
>> >> Kindly let me know of your suggestions for this, and I will change them
>> > during the next version.
>> Well I was just putting it out there for consideration. Yes, I think
>> the whole idea of consuming the "trace_*" namespace in this patchset
>> was ill-advised.
> Since "trace_*" uses relay infrastructure underneath, I am thinking, if it
> would be acceptable to rename them to "relay_*", say relay_printk() and
> relay_dump(). I would be glad to hear from the 'relay' folks about this
> thought.
>
Hi Andrew,
Given that "trace_*" consists of wrapper functions around "relay"
(relay + debugfs filesystem), I'm sending out the following patches
which rename lib/trace.[ch] to kernel/relay_debugfs.[ch]. The "trace_*"
functions are renamed to "relay_*" functions without any name-space
clashes with existing "relay" functions.
Now the new functions relay_printk() and relay_dump() will provide an
easy-to-use interface for "relay" and will also reduce the amount of
code require to setup/cleanup relay.
>> Also, I don't know how to move forward with the whole feature - I
>> haven't seen a lot of interest from others and I haven't seen much
>> discussion of how this feature differs from all the other tracing
>> things which have been floating about.
> More than a tracing mechanism, this is a tool that aids in tracing, by
> providing a powerful function that directs its output onto the debugfs
> mount path which could be later harnessed by user-space applications
> too. A potential in-kernel user could be the 'marker' handlers which
> more often would be interested in logging data.
>
>> And even if the proposed patches presently offer unique and useful
>> features, will one of the other tracing implementations (eg: ltt) later
>> grow to close that gap?
>> I'm also a bit dubious about the whole thing based on past experience
>> with kernel-developer-only in-kernel tools. People just don't use them
>> much. One example: fault injection.
>
> Among the other enhancements that we were contemplating for this
> mechanism, to make it more powerful and unique, is the ability to
> a)Define callback functions typically invoked everytime before accessing/
> printing each variable (which may say, acquire a lock or prefix a
> timestamp) by adding fields to the trace_printk_data structure.
> b)Provide sequencing information for the output, along with ability to
> prefix the output with essential data such as PID, Timestamp, CPUID,
> etc.
>
Along with the above mentioned points, the sole in-kernel user of
"relay" which is "blktrace" was also converted to use (the erstwhile)
"trace_*", which resulted in significant code reduction. I will now migrate
"blktrace" to use "relay_*".
Kindly let us know what you think about the patches.
Thanks,
K.Prasad
P.S.: The second patch which actually effects the name change, along
with addition of relay_printk()/relay_dump() interface is found to be
quite lengthy. Since the patches have been previously reviewed I'm
sending them as a single chunk of patch.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [RFC Patch 0/1] Merging Documentation/trace.txt with Documentation/filesystems/relay.txt
2008-05-28 18:16 ` K.Prasad
@ 2008-05-28 18:37 ` K.Prasad
2008-05-28 18:48 ` [RFC Patch 2/2] Renaming lib/trace.[ch] files to kernel/relay_debugfs.[ch] and enhancements K.Prasad
1 sibling, 0 replies; 11+ messages in thread
From: K.Prasad @ 2008-05-28 18:37 UTC (permalink / raw)
To: akpm, zanussi; +Cc: linux-kernel, dwilder, hch, prasad
This patch merges the "trace" documentation with that of "relay" as a
part of renaming/merging "trace" with "relay. It also renames the
functions wherever required.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
Documentation/filesystems/relay.txt | 215 ++++++++++++++++++++++++++++++++++++
Documentation/trace.txt | 210 -----------------------------------
2 files changed, 215 insertions(+), 210 deletions(-)
Index: linux-relay_NEW-2.6.25-mm1/Documentation/filesystems/relay.txt
===================================================================
--- linux-relay_NEW-2.6.25-mm1.orig/Documentation/filesystems/relay.txt
+++ linux-relay_NEW-2.6.25-mm1/Documentation/filesystems/relay.txt
@@ -470,6 +470,213 @@ unmapped. The client can use this notif
within the kernel application, such as enabling/disabling logging to
the channel.
+Enhanced Relay interface using debugfs -- Relay debugfs
+========================================================
+Relay debugfs Setup and Control
+================================
+In the kernel, the 'relay debugfs' interface provides a simple mechanism for
+starting and managing data channels (relays) to user space. The
+'relay debugfs' interface builds on the relay interface. For a complete
+description of the relay interface, please see:
+Documentation/filesystems/relay.txt.
+
+The 'relay debugfs' interface provides a single layer in a complete tracing
+application. 'relay debugfs' provides a kernel API that can be used for the
+setup and control of tracing channels. User of 'relay debugfs' must provide a
+data layer responsible for formatting and writing data into the 'relay debugfs'
+channels.
+
+A layered approach to tracing
+=============================
+A complete kernel tracing application consists of a data provider and
+a data consumer. Both provider and consumer contain three layers; each
+layer works in tandem with the corresponding layer in the opposite side.
+The layers are represented in the following diagram.
+
+Provider Data layer
+ Formats raw data and provides data-related service.
+ For example, adding timestamps used by consumer to sort data.
+
+Provider Control layer
+ Provided by the 'relay debugfs' interface, this layer creates 'relay
+ debugfs' channels and informs the data layer and consumer of the current
+ state of the 'relay debugfs' channels.
+
+Provider Buffering layer
+ Provided by relay. This layer buffers data in the
+ kernel for consumption by the consumer's buffer
+ layer.
+
+Provider (in-kernel facility)
+-----------------------------------------------------------------------------
+Consumer (user application)
+
+
+Consumer Buffer layer
+ Reads/consumes data from the provider's data buffers.
+
+Consumer Control layer
+ Communicates to the provider's control layer to control the state
+ of the 'relay debugfs' channels.
+
+Consumer Data layer
+ Sorts and formats data as provided by the provider's data layer.
+
+The provider is coded as a kernel facility. The consumer is coded as
+a user application.
+
+
+'relay debugfs' - Features
+===========================
+'relay debugfs' exploits services and features provided by relay. These
+features are:
+- The creation and destruction of relay channels.
+- Buffer management. Overwrite or non-overwrite modes can be selected
+ as well as global or per-CPU buffering.
+
+Overwrite mode can be called "flight recorder mode". Flight recorder
+mode is selected by setting the TRACE_FLIGHT_CHANNEL flag when
+creating 'relay debugfs' channels. In flight mode when a tracing buffer is
+full, the oldest records in the buffer will be discarded to make room
+as new records arrive. In the default non-overwrite mode, new records
+may be written only if the buffer has room. In either case, to
+prevent data loss, a user space reader must keep the buffers
+drained. 'relay debugfs' provides a means to detect the number of records that
+have been dropped due to a buffer-full condition (non-overwrite mode
+only).
+
+When per-CPU buffers are used, relay creates one debugfs file for each
+running CPU. The user-space consumer of the data is responsible for
+reading the per-CPU buffers and collating the records presumably using
+a time stamp or sequence number included in the 'relay debugfs' records. The
+use of global buffers eliminates this extra work of sequencing
+records; however the provider's data layer must hold a lock when
+writing records. The lock prevents writers running on different CPUs
+from overwriting each other's data. However, buffering may be slower
+because writes to the buffer are serialized. Global buffering is
+selected by setting the TRACE_GLOBAL_CHANNEL flag when creating 'relay debugfs'
+channels.
+
+'relay debugfs' User Interface
+==============================
+When a 'relay debugfs' channel is created and started, the following
+directories and files are created in the root of the mounted debugfs.
+
+/debug (root of the debugfs)
+ /<relay_debugfs-root-dir>
+ /<relay_debugfs-name>
+ relay[0...N-1] Per-CPU 'relay debugfs' data, one
+ file per CPU.
+
+ state Start or stop tracing by
+ by writing the strings
+ "start" or "stop" to this
+ file. Read the file to get the
+ current state.
+
+ dropped The number of records dropped
+ due to a full-buffer condition,
+ for non-TRACE_FLIGHT_CHANNELs
+ only.
+
+ rewind Trigger a rewind by writing
+ to this file. i.e. start
+ next read at the beginning
+ again. Only available for
+ TRACE_FLIGHT_CHANNELS.
+
+
+ nr_sub Number of sub-buffers
+ in the channel.
+
+ sub_size Size of sub-buffers in
+ the channel.
+
+'relay debugfs' data is gathered from the 'relay debugfs'[0...N-1] files using
+one of the available interfaces provided by relay.
+
+When using the read(2) interface, as data is read it is marked as
+consumed by the relay subsystem. Therefore, subsequent reads will
+only return unconsumed data.
+
+'relay debugfs' Kernel API
+==========================
+An overview of the 'relay debugfs' Kernel API is now given. More details of the
+API can be found in linux/trace.h.
+
+The steps a kernel data provider takes to utilize the 'relay debugfs' interface
+are:
+1) Set up a 'relay debugfs' channel - relay_setup()
+2) Start the 'relay debugfs' channel - relay_start()
+3) Write one or more 'relay debugfs' records into the channel (using the relay
+ API).
+
+ Important: When writing a 'relay debugfs' record the provider must insure
+ that preemption is disabled and that 'relay debugfs' state is set to
+ "running". A typical function used to write records into a 'relay debugfs'
+ channel should follow the following semantics:
+
+ rcu_read_lock(); // disables preemption
+ if (relay_running(relay)){
+ relay_write(....); // use any available relay data
+ // function
+ }
+ rcu_read_unlock(); // enables preemption
+
+4) Stop and start tracing as desired - relay_start()/relay_stop()
+5) Destroy the 'relay debugfs' channel and underlying relay channel -
+ relay_cleanup().
+
+Kernel Configuration
+--------------------
+To use 'relay debugfs', configure your kernel with CONFIG_TRACE=y.
+'relay debugfs' depends on both CONFIG_RELAY and CONFIG_DEBUG_FS, these will be
+automatically configured when CONFIG_TRACE is selected (if not already
+configured).
+
+Using the User Interface
+------------------------
+Reading 'relay debugfs' data and controlling the 'relay debugfs' can be done
+using commands such as cat, echo and sort. However, If you are logging binary
+'relay debugfs' data a custom application may be required to read and process
+the 'relay debugfs' data. This section shows several examples of reading
+'relay debugfs' data and controling the 'relay debugfs'. All examples assume
+that the relay_debugfs directory is your current working directory.
+
+Viewing the current 'relay debugfs' state:
+$cat state
+
+Turning the 'relay debugfs' on and off:
+$echo start > state
+$echo stop > state
+
+Reading data when using global buffers (USE_GLOBAL_BUFFERS):
+$echo stop > state
+$cat relay0
+$echo start > state
+
+Reading data when using per-cpu buffers:
+When using per-cpu buffers the user should add a time stamp or sequence
+number to each 'relay debugfs' records. This is used by the consumer to sort
+the 'relay debugfs' records into chronological order. In the following example
+the user has placed a time stamp at the front of each the record. The format of
+a record is now shown.
+
+:<time stamp>:<field 1>:<field 2>:..........
+
+Collect the data from the per-cpu 'relay debugfs' buffers, sorting
+chronologically:
+$sort --field-separator=: --key=2.1 relay*
+
+Verify that no data was lost by examining the dropped file:
+$ cat dropped
+
+To collect a larger amount of data the 'relay debugfs' buffers can be read
+continuously using something like:
+
+while [ 1 ] ; do
+ sort --field-separator=: --key=2.1 relay*
+done
Resources
=========
@@ -493,3 +700,11 @@ Tom Zanussi <zanussi@us.ibm.com>
Also thanks to Hubertus Franke for a lot of useful suggestions and bug
reports.
+
+'relay debugfs' is adapted from blktrace authored by Jens Axboe
+(axboe@kernel.dk).
+
+Major contributions were made by:
+Tom Zanussi <zanussi@us.ibm.com>
+Martin Hunt <hunt@redhat.com>
+David Wilder <dwilder@us.ibm.com>
Index: linux-relay_NEW-2.6.25-mm1/Documentation/trace.txt
===================================================================
--- linux-relay_NEW-2.6.25-mm1.orig/Documentation/trace.txt
+++ /dev/null
@@ -1,210 +0,0 @@
-Trace Setup and Control
-=======================
-In the kernel, the trace interface provides a simple mechanism for
-starting and managing data channels (traces) to user space. The
-trace interface builds on the relay interface. For a complete
-description of the relay interface, please see:
-Documentation/filesystems/relay.txt.
-
-The trace interface provides a single layer in a complete tracing
-application. Trace provides a kernel API that can be used for the setup
-and control of tracing channels. User of trace must provide a data layer
-responsible for formatting and writing data into the trace channels.
-
-A layered approach to tracing
-=============================
-A complete kernel tracing application consists of a data provider and
-a data consumer. Both provider and consumer contain three layers; each
-layer works in tandem with the corresponding layer in the opposite side.
-The layers are represented in the following diagram.
-
-Provider Data layer
- Formats raw trace data and provides data-related service.
- For example, adding timestamps used by consumer to sort data.
-
-Provider Control layer
- Provided by the trace interface, this layer creates trace channels
- and informs the data layer and consumer of the current state
- of the trace channels.
-
-Provider Buffering layer
- Provided by relay. This layer buffers data in the
- kernel for consumption by the consumer's buffer
- layer.
-
-Provider (in-kernel facility)
------------------------------------------------------------------------------
-Consumer (user application)
-
-
-Consumer Buffer layer
- Reads/consumes data from the provider's data buffers.
-
-Consumer Control layer
- Communicates to the provider's control layer to control the state
- of the trace channels.
-
-Consumer Data layer
- Sorts and formats data as provided by the provider's data layer.
-
-The provider is coded as a kernel facility. The consumer is coded as
-a user application.
-
-
-Trace - Features
-================
-Trace exploits services and features provided by relay. These features
-are:
-- The creation and destruction of relay channels.
-- Buffer management. Overwrite or non-overwrite modes can be selected
- as well as global or per-CPU buffering.
-
-Overwrite mode can be called "flight recorder mode". Flight recorder
-mode is selected by setting the TRACE_FLIGHT_CHANNEL flag when
-creating trace channels. In flight mode when a tracing buffer is
-full, the oldest records in the buffer will be discarded to make room
-as new records arrive. In the default non-overwrite mode, new records
-may be written only if the buffer has room. In either case, to
-prevent data loss, a user space reader must keep the buffers
-drained. Trace provides a means to detect the number of records that
-have been dropped due to a buffer-full condition (non-overwrite mode
-only).
-
-When per-CPU buffers are used, relay creates one debugfs file for each
-running CPU. The user-space consumer of the data is responsible for
-reading the per-CPU buffers and collating the records presumably using
-a time stamp or sequence number included in the trace records. The
-use of global buffers eliminates this extra work of sequencing
-records; however the provider's data layer must hold a lock when
-writing records. The lock prevents writers running on different CPUs
-from overwriting each other's data. However, buffering may be slower
-because writes to the buffer are serialized. Global buffering is
-selected by setting the TRACE_GLOBAL_CHANNEL flag when creating trace
-channels.
-
-Trace User Interface
-===================
-When a trace channel is created and started, the following
-directories and files are created in the root of the mounted debugfs.
-
-/debug (root of the debugfs)
- /<trace-root-dir>
- /<trace-name>
- trace[0...N-1] Per-CPU trace data, one
- file per CPU.
-
- state Start or stop tracing by
- by writing the strings
- "start" or "stop" to this
- file. Read the file to get the
- current state.
-
- dropped The number of records dropped
- due to a full-buffer condition,
- for non-TRACE_FLIGHT_CHANNELs
- only.
-
- rewind Trigger a rewind by writing
- to this file. i.e. start
- next read at the beginning
- again. Only available for
- TRACE_FLIGHT_CHANNELS.
-
-
- nr_sub Number of sub-buffers
- in the channel.
-
- sub_size Size of sub-buffers in
- the channel.
-
-Trace data is gathered from the trace[0...N-1] files using one of the
-available interfaces provided by relay.
-
-When using the read(2) interface, as data is read it is marked as
-consumed by the relay subsystem. Therefore, subsequent reads will
-only return unconsumed data.
-
-Trace Kernel API
-===============
-An overview of the trace Kernel API is now given. More details of the
-API can be found in linux/trace.h.
-
-The steps a kernel data provider takes to utilize the trace interface are:
-1) Set up a trace channel - trace_setup()
-2) Start the trace channel - trace_start()
-3) Write one or more trace records into the channel (using the relay API).
-
- Important: When writing a trace record the provider must insure that
- preemption is disabled and that trace state is set to "running". A
- typical function used to write records into a trace channel should
- follow the following semantics:
-
- rcu_read_lock(); // disables preemption
- if (trace_running(trace)){
- relay_write(....); // use any available relay data
- // function
- }
- rcu_read_unlock(); // enables preemption
-
-4) Stop and start tracing as desired - trace_start()/trace_stop()
-5) Destroy the trace channel and underlying relay channel -
- trace_cleanup().
-
-Kernel Configuration
---------------------
-To use trace, configure your kernel with CONFIG_TRACE=y. Trace depends on
-both CONFIG_RELAY and CONFIG_DEBUG_FS, these will be automatically configured
-when CONFIG_TRACE is selected (if not already configured).
-
-Using the User Interface
-------------------------
-Reading trace data and controlling the trace can be done using commands such
-as cat, echo and sort. However, If you are logging binary trace data a
-custom application may be required to read and process the trace data.
-This section shows several examples of reading trace data and controling
-the trace. All examples assume that the "trace" directory is your current
-working directory.
-
-Viewing the current trace state:
-$cat state
-
-Turning the trace on and off:
-$echo start > state
-$echo stop > state
-
-Reading data when using global buffers (USE_GLOBAL_BUFFERS):
-$echo stop > state
-$cat trace0
-$echo start > state
-
-Reading data when using per-cpu buffers:
-When using per-cpu buffers the tracer should add a time stamp or sequence
-number to each trace records. This is used by the consumer to sort the trace
-records into chronological order. In the following example the tracer has
-placed a time stamp at the front of each the record. The format of a record
-is now shown.
-
-:<time stamp>:<field 1>:<field 2>:..........
-
-Collect the data from the per-cpu trace buffers, sorting chronologically:
-$sort --field-separator=: --key=2.1 trace*
-
-Verify that no data was lost by examining the dropped file:
-$ cat dropped
-
-To collect a larger amount of data the trace buffers can be read
-continuously using something like:
-
-while [ 1 ] ; do
- sort --field-separator=: --key=2.1 trace*
-done
-
-
-Credits
-=======
-Trace is adapted from blktrace authored by Jens Axboe (axboe@kernel.dk).
-
-Major contributions were made by:
-Tom Zanussi <zanussi@us.ibm.com>
-Martin Hunt <hunt@redhat.com>
-David Wilder <dwilder@us.ibm.com>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [RFC Patch 2/2] Renaming lib/trace.[ch] files to kernel/relay_debugfs.[ch] and enhancements
2008-05-28 18:16 ` K.Prasad
2008-05-28 18:37 ` [RFC Patch 0/1] Merging Documentation/trace.txt with Documentation/filesystems/relay.txt K.Prasad
@ 2008-05-28 18:48 ` K.Prasad
1 sibling, 0 replies; 11+ messages in thread
From: K.Prasad @ 2008-05-28 18:48 UTC (permalink / raw)
To: akpm, zanussi; +Cc: linux-kernel, dwilder, hch, prasad
This patch renames the lib/trace.[ch] files to
kernel/relay_debugfs.[ch]. Also present are the changes to rename the
"trace_*" functions to "relay_*".
Two new functions which provide an easy-to-use interface for relay
called relay_printk() and relay_dump() are also introduced (earlier
called as trace_printk() and trace_dump()).
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
Documentation/filesystems/relay.txt | 22 +
include/linux/relay_debugfs.h | 155 +++++++
include/linux/trace.h | 99 ----
init/Kconfig | 9
kernel/Makefile | 1
kernel/relay_debugfs.c | 760 ++++++++++++++++++++++++++++++++++++
lib/Kconfig | 9
lib/Makefile | 2
lib/trace.c | 563 --------------------------
samples/Kconfig | 8
samples/Makefile | 2
samples/relay/Makefile | 4
samples/relay/fork_trace.c | 132 ++++++
samples/trace/Makefile | 4
samples/trace/fork_new_trace.c | 99 ++++
samples/trace/fork_trace.c | 132 ------
16 files changed, 1187 insertions(+), 814 deletions(-)
Index: linux-2.6.25/include/linux/relay_debugfs.h
===================================================================
--- /dev/null
+++ linux-2.6.25/include/linux/relay_debugfs.h
@@ -0,0 +1,155 @@
+/*
+ * RELAY DEBUGFS defines and function prototypes
+ *
+ * Copyright (C) 2007 IBM Inc.
+ *
+ * Tom Zanussi <zanussi@us.ibm.com>
+ * Martin Hunt <hunt@redhat.com>
+ * David Wilder <dwilder@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+#ifndef _LINUX_RELAY_DEBUGFS_H
+#define _LINUX_RELAY_DEBUGFS_H
+
+#include <linux/relay.h>
+
+/*
+ * RELAY DEBUGFS channel flags
+ */
+#define RELAY_GLOBAL_CHANNEL 0x01
+#define RELAY_FLIGHT_CHANNEL 0x02
+#define RELAY_DISABLE_STATE 0x04
+
+enum relay_state {
+ RELAY_SETUP,
+ RELAY_RUNNING,
+ RELAY_STOPPED,
+};
+
+enum relay_dir_state {
+ RELAY_PARENT_DIR_ABSENT,
+ RELAY_PARENT_DIR_EXISTS,
+ RELAY_DIR_EXISTS
+};
+
+#define RELAY_ROOT_NAME_SIZE 64 /* Max root dir identifier */
+#define RELAY_NAME_SIZE 64 /* Max relay identifier */
+
+/*
+ * Buffers for use by relay_printk
+ */
+#define DEFAULT_RELAY_BUF_SIZE 4096
+#define DEFAULT_RELAY_SUB_BUF_NR 40
+/*
+ * Global root user information
+ */
+struct relay_root {
+ struct list_head list;
+ char name[RELAY_ROOT_NAME_SIZE];
+ struct dentry *root;
+ unsigned int users;
+};
+
+/*
+ * Client information
+ */
+struct relay_info {
+ struct mutex state_mutex; /* Used to protect state changes */
+ enum relay_state state;
+ struct dentry *state_file;
+ struct rchan *rchan;
+ struct dentry *dir;
+ struct dentry *dropped_file;
+ struct dentry *reset_consumed_file;
+ struct dentry *nr_sub_file;
+ struct dentry *sub_size_file;
+ atomic_t dropped;
+ struct relay_root *root;
+ void *private_data;
+ unsigned int flags;
+ unsigned int buf_size;
+ unsigned int buf_nr;
+ spinlock_t relay_lock;
+};
+
+struct relay_printk_data {
+ char *parent_dir;
+ char *dir;
+ int exists;
+ int buf_size;
+ int sub_buf_size;
+ unsigned long flags;
+ struct relay_info *ti;
+};
+
+/*
+ * Information about every 'relay debugfs' directory
+ */
+struct relay_dir {
+ struct list_head relay_dir_list;
+ char relay_dir_name[RELAY_NAME_SIZE];
+ struct dentry *relay_root;
+ struct dentry *relay_dir;
+ struct relay_info *ti;
+};
+
+#ifdef CONFIG_RELAY_DEBUGFS
+static inline int relay_running(struct relay_info *relay)
+{
+ return relay->state == RELAY_RUNNING;
+}
+struct relay_info *relay_setup(const char *root, const char *name,
+ u32 buf_size, u32 buf_nr, u32 flags);
+int relay_start(struct relay_info *relay);
+int relay_stop(struct relay_info *relay);
+void relay_cleanup(struct relay_info *relay);
+int relay_exists(const char *parent_dir, const char *dir,
+ struct relay_info **ti);
+void relay_cleanup_all(const char *parent_dir);
+int relay_printk(struct relay_printk_data *dpk, char *format, ...);
+int relay_dump(struct relay_printk_data *dpk, const void *output,
+ const int output_len);
+#else
+static inline struct relay_info *relay_setup(const char *root,
+ const char *name, u32 buf_size,
+ u32 buf_nr, u32 flags)
+{
+ return NULL;
+}
+static inline int relay_start(struct relay_info *relay) { return -EINVAL; }
+static inline int relay_stop(struct relay_info *relay) { return -EINVAL; }
+static inline int relay_running(struct relay_info *relay) { return 0; }
+static inline void relay_cleanup(struct relay_info *relay) {}
+static inline int relay_exists(const char *parent_dir, const char *dir,
+ struct relay_info **ti)
+{
+ return -EINVAL;
+}
+static inline void relay_cleanup_all(const char *parent_dir) {}
+static inline int relay_printk(struct relay_printk_data *dpk, char *format,
+ ...)
+{
+ return -EINVAL;
+}
+int relay_dump(struct relay_printk_data *dpk, const void *output,
+ const int output_len)
+{
+ return -EINVAL;
+}
+
+#endif
+
+#endif /* ifdef CONFIG_RELAY_DEBUGFS */
Index: linux-2.6.25/include/linux/trace.h
===================================================================
--- linux-2.6.25.orig/include/linux/trace.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * TRACE defines and function prototypes
- *
- * Copyright (C) 2007 IBM Inc.
- *
- * Tom Zanussi <zanussi@us.ibm.com>
- * Martin Hunt <hunt@redhat.com>
- * David Wilder <dwilder@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- */
-#ifndef _LINUX_TRACE_H
-#define _LINUX_TRACE_H
-
-#include <linux/relay.h>
-
-/*
- * TRACE channel flags
- */
-#define TRACE_GLOBAL_CHANNEL 0x01
-#define TRACE_FLIGHT_CHANNEL 0x02
-#define TRACE_DISABLE_STATE 0x04
-
-enum trace_state {
- TRACE_SETUP,
- TRACE_RUNNING,
- TRACE_STOPPED,
-};
-
-#define TRACE_ROOT_NAME_SIZE 64 /* Max root dir identifier */
-#define TRACE_NAME_SIZE 64 /* Max trace identifier */
-
-/*
- * Global root user information
- */
-struct trace_root {
- struct list_head list;
- char name[TRACE_ROOT_NAME_SIZE];
- struct dentry *root;
- unsigned int users;
-};
-
-/*
- * Client information
- */
-struct trace_info {
- struct mutex state_mutex; /* Used to protect state changes */
- enum trace_state state;
- struct dentry *state_file;
- struct rchan *rchan;
- struct dentry *dir;
- struct dentry *dropped_file;
- struct dentry *reset_consumed_file;
- struct dentry *nr_sub_file;
- struct dentry *sub_size_file;
- atomic_t dropped;
- struct trace_root *root;
- void *private_data;
- unsigned int flags;
- unsigned int buf_size;
- unsigned int buf_nr;
-};
-
-#ifdef CONFIG_TRACE
-static inline int trace_running(struct trace_info *trace)
-{
- return trace->state == TRACE_RUNNING;
-}
-struct trace_info *trace_setup(const char *root, const char *name,
- u32 buf_size, u32 buf_nr, u32 flags);
-int trace_start(struct trace_info *trace);
-int trace_stop(struct trace_info *trace);
-void trace_cleanup(struct trace_info *trace);
-#else
-static inline struct trace_info *trace_setup(const char *root,
- const char *name, u32 buf_size,
- u32 buf_nr, u32 flags)
-{
- return NULL;
-}
-static inline int trace_start(struct trace_info *trace) { return -EINVAL; }
-static inline int trace_stop(struct trace_info *trace) { return -EINVAL; }
-static inline int trace_running(struct trace_info *trace) { return 0; }
-static inline void trace_cleanup(struct trace_info *trace) {}
-#endif
-
-#endif
Index: linux-2.6.25/kernel/relay_debugfs.c
===================================================================
--- /dev/null
+++ linux-2.6.25/kernel/relay_debugfs.c
@@ -0,0 +1,760 @@
+/*
+ * Based on blktrace code, Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
+ * Moved to utt.c by Tom Zanussi <zanussi@us.ibm.com>, 2006
+ * Additional contributions by:
+ * Martin Hunt <hunt@redhat.com>, 2007
+ * David Wilder <dwilder@us.ibm.com>, 2007
+ * Renamed to trace <dwilder.ibm.com>, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
+#include <linux/relay_debugfs.h>
+
+static LIST_HEAD(relay_roots);
+static LIST_HEAD(relay_dirs);
+static DEFINE_MUTEX(relay_mutex);
+
+static int state_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = inode->i_private;
+ return 0;
+}
+
+static ssize_t state_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct relay_info *relay = filp->private_data;
+ char *buf = "relay not started\n";
+
+ if (relay->state == RELAY_STOPPED)
+ buf = "stopped\n";
+ else if (relay->state == RELAY_RUNNING)
+ buf = "running\n";
+ return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
+static ssize_t state_write(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct relay_info *relay = filp->private_data;
+ char buf[16];
+ int ret;
+
+ if (relay->flags & RELAY_DISABLE_STATE)
+ return -EINVAL;
+
+ if (count > sizeof(buf) - 1)
+ return -EINVAL;
+
+ if (copy_from_user(buf, buffer, count))
+ return -EFAULT;
+
+ buf[count-1] = '\0';
+
+ if (strcmp(buf, "start") == 0) {
+ ret = relay_start(relay);
+ if (ret)
+ return ret;
+ } else if (strcmp(buf, "stop") == 0)
+ relay_stop(relay);
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+static struct file_operations state_fops = {
+ .owner = THIS_MODULE,
+ .open = state_open,
+ .read = state_read,
+ .write = state_write,
+};
+
+static void remove_root(struct relay_info *relay)
+{
+ if (relay->root->root && simple_empty(relay->root->root)) {
+ debugfs_remove(relay->root->root);
+ list_del(&relay->root->list);
+ kfree(relay->root);
+ relay->root = NULL;
+ }
+}
+
+static void remove_tree(struct relay_info *relay)
+{
+ struct list_head *pos, *temp;
+ struct relay_dir *dr = NULL;
+
+ mutex_lock(&relay_mutex);
+ debugfs_remove(relay->dir);
+
+ list_for_each_safe(pos, temp, &relay_dirs) {
+ dr = list_entry(pos, struct relay_dir, relay_dir_list);
+ if (dr->ti == relay) {
+ list_del(pos);
+ kfree(dr);
+ }
+ }
+
+ if (relay->root) {
+ if (--relay->root->users == 0)
+ remove_root(relay);
+ }
+
+ mutex_unlock(&relay_mutex);
+}
+
+/*
+ * Creates the relay_root if it's not found.
+ */
+static struct relay_root *lookup_root(const char *root)
+{
+ struct list_head *pos;
+ struct relay_root *r;
+
+ list_for_each(pos, &relay_roots) {
+ r = list_entry(pos, struct relay_root, list);
+ if (!strcmp(r->name, root))
+ return r;
+ }
+
+ r = kzalloc(sizeof(struct relay_root), GFP_KERNEL);
+ if (!r)
+ return ERR_PTR(-ENOMEM);
+
+ strlcpy(r->name, root, sizeof(r->name));
+
+ r->root = debugfs_create_dir(root, NULL);
+ if (IS_ERR(r->root))
+ r->root = NULL;
+ else
+ list_add(&r->list, &relay_roots);
+
+ return r;
+}
+
+static struct dentry *create_tree(struct relay_info *relay, const char *root,
+ const char *name)
+{
+ struct relay_dir *temp;
+
+ if (root == NULL || name == NULL)
+ return ERR_PTR(-EINVAL);
+
+ temp = kzalloc(sizeof(struct relay_dir), GFP_KERNEL);
+ if ((temp == NULL) || (strlen(name) > RELAY_NAME_SIZE))
+ return ERR_PTR(-ENOMEM);
+
+ strlcpy(temp->relay_dir_name, name, sizeof(temp->relay_dir_name));
+
+ mutex_lock(&relay_mutex);
+
+ relay->root = lookup_root(root);
+ if (IS_ERR(relay->root)) {
+ relay->root = NULL;
+ goto err;
+ }
+
+ temp->relay_root = relay->root->root;
+ temp->relay_dir = debugfs_create_dir(name, relay->root->root);
+
+ if (IS_ERR(temp->relay_dir))
+ remove_root(relay);
+ else {
+ relay->root->users++;
+ temp->ti = relay;
+ list_add_tail(&temp->relay_dir_list, &relay_dirs);
+ }
+
+err:
+ mutex_unlock(&relay_mutex);
+ return temp->relay_dir;
+}
+
+int relay_exists(const char *parent_dir, const char *dir,
+ struct relay_info **ti)
+{
+ struct list_head *pos;
+ struct relay_root *r;
+ struct relay_dir *temp;
+
+ list_for_each(pos, &relay_roots) {
+ r = list_entry(pos, struct relay_root, list);
+ if (!strcmp(parent_dir, r->name))
+ goto search_dir;
+ }
+ return RELAY_PARENT_DIR_ABSENT;
+
+ search_dir:
+ list_for_each(pos, &relay_dirs) {
+ temp = list_entry(pos, struct relay_dir, relay_dir_list);
+
+ if (!strcmp(dir, temp->relay_dir_name)) {
+ *ti = temp->ti;
+ return RELAY_DIR_EXISTS;
+ }
+ }
+ return RELAY_PARENT_DIR_EXISTS;
+}
+EXPORT_SYMBOL_GPL(relay_exists);
+
+static int dropped_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = inode->i_private;
+
+ return 0;
+}
+
+static ssize_t dropped_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct relay_info *relay = filp->private_data;
+ char buf[16];
+
+ snprintf(buf, sizeof(buf), "%u\n", atomic_read(&relay->dropped));
+
+ return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
+static struct file_operations dropped_fops = {
+ .owner = THIS_MODULE,
+ .open = dropped_open,
+ .read = dropped_read,
+};
+
+static int reset_consumed_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = inode->i_private;
+
+ return 0;
+}
+
+static ssize_t reset_consumed_write(struct file *filp,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ int ret = count;
+ struct relay_info *relay = filp->private_data;
+
+ mutex_lock(&relay->state_mutex);
+ switch (relay->state) {
+ case RELAY_RUNNING:
+ relay->state = RELAY_STOPPED;
+ synchronize_rcu();
+ relay_flush(relay->rchan);
+ relay_reset_consumed(relay->rchan);
+ relay->state = RELAY_RUNNING;
+ break;
+ case RELAY_STOPPED:
+ relay_reset_consumed(relay->rchan);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ mutex_unlock(&relay->state_mutex);
+ return ret;
+}
+
+static struct file_operations reset_consumed_fops = {
+ .owner = THIS_MODULE,
+ .open = reset_consumed_open,
+ .write = reset_consumed_write
+};
+
+static int sub_size_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = inode->i_private;
+
+ return 0;
+}
+
+static ssize_t sub_size_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct relay_info *relay = filp->private_data;
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), "%zu\n", relay->rchan->subbuf_size);
+
+ return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
+static struct file_operations sub_size_fops = {
+ .owner = THIS_MODULE,
+ .open = sub_size_open,
+ .read = sub_size_read,
+};
+
+static int nr_sub_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = inode->i_private;
+ return 0;
+}
+
+static ssize_t nr_sub_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct relay_info *relay = filp->private_data;
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), "%zu\n", relay->rchan->n_subbufs);
+
+ return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
+static struct file_operations nr_sub_fops = {
+ .owner = THIS_MODULE,
+ .open = nr_sub_open,
+ .read = nr_sub_read,
+};
+
+/*
+ * Keep track of how many times we encountered a full subbuffer, to aid
+ * the user space app in telling how many lost events there were.
+ */
+static int subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
+ void *prev_subbuf, size_t prev_padding)
+{
+ struct relay_info *relay = buf->chan->private_data;
+
+ if (relay->flags & RELAY_FLIGHT_CHANNEL)
+ return 1;
+
+ if (!relay_buf_full(buf))
+ return 1;
+
+ atomic_inc(&relay->dropped);
+
+ return 0;
+}
+
+static int remove_buf_file_callback(struct dentry *dentry)
+{
+ debugfs_remove(dentry);
+
+ return 0;
+}
+
+static struct dentry *create_buf_file_callback(const char *filename,
+ struct dentry *parent, int mode,
+ struct rchan_buf *buf,
+ int *is_global)
+{
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+}
+
+static struct dentry *create_global_buf_file_callback(const char *filename,
+ struct dentry *parent,
+ int mode,
+ struct rchan_buf *buf,
+ int *is_global)
+{
+ *is_global = 1;
+
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+}
+
+static struct rchan_callbacks relay_callbacks = {
+ .subbuf_start = subbuf_start_callback,
+ .create_buf_file = create_buf_file_callback,
+ .remove_buf_file = remove_buf_file_callback,
+};
+static struct rchan_callbacks relay_callbacks_global = {
+ .subbuf_start = subbuf_start_callback,
+ .create_buf_file = create_global_buf_file_callback,
+ .remove_buf_file = remove_buf_file_callback,
+};
+
+static void remove_controls(struct relay_info *relay)
+{
+ debugfs_remove(relay->state_file);
+ debugfs_remove(relay->dropped_file);
+ debugfs_remove(relay->reset_consumed_file);
+ debugfs_remove(relay->nr_sub_file);
+ debugfs_remove(relay->sub_size_file);
+ remove_tree(relay);
+}
+
+/*
+ * Setup controls for tracing.
+ */
+static struct relay_info *setup_controls(const char *root,
+ const char *name, u32 flags)
+{
+ struct relay_info *relay;
+ long ret;
+
+ relay = kzalloc(sizeof(*relay), GFP_KERNEL);
+ if (!relay) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ relay->dir = create_tree(relay, root, name);
+ if (IS_ERR(relay->dir)) {
+ ret = PTR_ERR(relay->dir);
+ relay->dir = NULL;
+ goto err;
+ }
+
+ relay->state_file = debugfs_create_file("state", 0444, relay->dir,
+ relay, &state_fops);
+ if (IS_ERR(relay->state_file)) {
+ ret = PTR_ERR(relay->state_file);
+ relay->state_file = NULL;
+ goto err;
+ }
+
+ if (!(flags & RELAY_FLIGHT_CHANNEL)) {
+ relay->dropped_file = debugfs_create_file("dropped", 0444,
+ relay->dir, relay,
+ &dropped_fops);
+ if (IS_ERR(relay->dropped_file)) {
+ ret = PTR_ERR(relay->dropped_file);
+ relay->dropped_file = NULL;
+ goto err;
+ }
+ }
+
+ if (flags & RELAY_FLIGHT_CHANNEL) {
+ relay->reset_consumed_file = debugfs_create_file("rewind", 0444,
+ relay->dir, relay,
+ &reset_consumed_fops);
+ if (IS_ERR(relay->reset_consumed_file)) {
+ ret = PTR_ERR(relay->reset_consumed_file);
+ relay->reset_consumed_file = NULL;
+ goto err;
+ }
+ }
+
+ relay->nr_sub_file = debugfs_create_file("nr_sub", 0444,
+ relay->dir, relay,
+ &nr_sub_fops);
+ if (IS_ERR(relay->nr_sub_file)) {
+ ret = PTR_ERR(relay->nr_sub_file);
+ relay->nr_sub_file = NULL;
+ goto err;
+ }
+
+ relay->sub_size_file = debugfs_create_file("sub_size", 0444,
+ relay->dir, relay,
+ &sub_size_fops);
+ if (IS_ERR(relay->sub_size_file)) {
+ ret = PTR_ERR(relay->sub_size_file);
+ relay->sub_size_file = NULL;
+ goto err;
+ }
+
+ return relay;
+err:
+ if (relay) {
+ remove_controls(relay);
+ kfree(relay);
+ }
+
+ return ERR_PTR(ret);
+}
+
+static int relay_setup_channel(struct relay_info *relay, u32 buf_size,
+ u32 buf_nr, u32 flags)
+{
+ if (!buf_size || !buf_nr)
+ return -EINVAL;
+
+ if (flags & RELAY_GLOBAL_CHANNEL)
+ relay->rchan = relay_open("trace", relay->dir, buf_size,
+ buf_nr, &relay_callbacks_global,
+ relay);
+ else
+ relay->rchan = relay_open("trace", relay->dir, buf_size,
+ buf_nr, &relay_callbacks, relay);
+
+ if (!relay->rchan)
+ return -ENOMEM;
+
+ relay->flags = flags;
+ relay->state = RELAY_SETUP;
+
+ return 0;
+}
+
+/**
+ * relay_setup - create a new relay relay handle
+ * @root: The root directory name to place relay directories.
+ * @name: Relay debugfs directory name, created in @root
+ * @buf_size: size of the relay sub-buffers
+ * @buf_nr: number of relay sub-buffers
+ * @flags: Option selection (see relay channel flags definitions)
+ *
+ * returns a relay_info handle or NULL, if setup failed.
+ *
+ * The @root is created (if needed) in the root of the debugfs.
+ * The default values when flags=0 are: use per-CPU buffering,
+ * use non-overwrite mode. See Documentation/filesystems/relay.txt for
+ * details.
+ */
+struct relay_info *relay_setup(const char *root, const char *name,
+ u32 buf_size, u32 buf_nr, u32 flags)
+{
+ struct relay_info *relay;
+
+ relay = setup_controls(root, name, flags);
+ if (IS_ERR(relay))
+ return relay;
+
+ relay->buf_size = buf_size;
+ relay->buf_nr = buf_nr;
+ relay->flags = flags;
+ mutex_init(&relay->state_mutex);
+ relay->state = RELAY_SETUP;
+
+ return relay;
+}
+EXPORT_SYMBOL_GPL(relay_setup);
+
+/**
+ * relay_start - start tracing
+ * @relay: relay handle to start.
+ *
+ * returns 0 if successful.
+ */
+int relay_start(struct relay_info *relay)
+{
+ /*
+ * For starting a relay, we can transition from a setup or stopped
+ * relay.
+ */
+ if (relay->state == RELAY_RUNNING)
+ return -EINVAL;
+
+ mutex_lock(&relay->state_mutex);
+ if (relay->state == RELAY_SETUP) {
+ int ret;
+
+ ret = relay_setup_channel(relay, relay->buf_size,
+ relay->buf_nr, relay->flags);
+ if (ret) {
+ mutex_unlock(&relay->state_mutex);
+ return ret;
+ }
+ }
+
+ relay->state = RELAY_RUNNING;
+ mutex_unlock(&relay->state_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(relay_start);
+
+/**
+ * relay_stop - stop tracing
+ * @relay: relay handle to stop.
+ *
+ */
+int relay_stop(struct relay_info *relay)
+{
+ int ret = -EINVAL;
+
+ /*
+ * For stopping a relay, the state must be running
+ */
+ mutex_lock(&relay->state_mutex);
+ if (relay->state == RELAY_RUNNING) {
+ relay->state = RELAY_STOPPED;
+ /*
+ * wait for all cpus to see the change in
+ * state before continuing
+ */
+ synchronize_sched();
+ relay_flush(relay->rchan);
+ ret = 0;
+ }
+ mutex_unlock(&relay->state_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(relay_stop);
+
+static void relay_cleanup_channel(struct relay_info *relay)
+{
+ relay_stop(relay);
+ relay_close(relay->rchan);
+ relay->rchan = NULL;
+}
+
+/**
+ * relay_cleanup - destroys the relay channel, control files and dir
+ * @relay: relay handle to cleanup
+ */
+void relay_cleanup(struct relay_info *relay)
+{
+ relay_cleanup_channel(relay);
+ remove_controls(relay);
+ kfree(relay);
+}
+EXPORT_SYMBOL_GPL(relay_cleanup);
+
+/**
+ * relay_cleanup_all - Removes all relay debugfs directories in parent_dir
+ * @parent_dir: Name of the parent directory
+ */
+void relay_cleanup_all(const char *parent_dir)
+{
+ struct list_head *pos, *pos_temp;
+ struct relay_dir *temp;
+
+ list_for_each_safe(pos, pos_temp, &relay_dirs) {
+ temp = list_entry(pos, struct relay_dir, relay_dir_list);
+ if (!strncmp(parent_dir, temp->relay_root->d_iname, \
+ strlen(parent_dir)))
+ relay_cleanup(temp->ti);
+ }
+}
+EXPORT_SYMBOL_GPL(relay_cleanup_all);
+
+/*
+ * Send formatted data to relay debugfs channel.
+ */
+static int relay_printf(struct relay_info *trace, const char *format,
+ va_list ap)
+{
+ va_list aq;
+ char *record;
+ int len, ret = 0;
+
+ if (relay_running(trace)) {
+ va_copy(aq, ap);
+ len = vsnprintf(NULL, 0, format, aq);
+ va_end(aq);
+ record = relay_reserve(trace->rchan, ++len);
+ if (record)
+ ret = vsnprintf(record, len, format, ap);
+ }
+ return ret;
+}
+
+static inline int init_relay_interface(struct relay_printk_data *tpk)
+{
+ int ret = 0;
+ tpk->exists = relay_exists(tpk->parent_dir, tpk->dir, &tpk->ti);
+
+ switch (tpk->exists) {
+
+ case RELAY_PARENT_DIR_EXISTS:
+ case RELAY_PARENT_DIR_ABSENT:
+ if (!tpk->buf_size)
+ tpk->buf_size = DEFAULT_RELAY_BUF_SIZE;
+ if (!tpk->sub_buf_size)
+ tpk->sub_buf_size = DEFAULT_RELAY_SUB_BUF_NR;
+ tpk->ti = relay_setup(tpk->parent_dir, tpk->dir,
+ tpk->buf_size, tpk->sub_buf_size, tpk->flags);
+ printk(KERN_INFO "Trace interface %s setup\n",
+ tpk->ti->dir->d_iname);
+ if (IS_ERR(tpk->ti)) {
+ printk(KERN_ERR "Error initialising %s interface\n",
+ tpk->ti->dir->d_iname);
+ return -EPERM;
+ }
+ /* Fall through */
+ case RELAY_DIR_EXISTS:
+ if (tpk->ti->state == RELAY_SETUP)
+ ret = relay_start(tpk->ti);
+ else
+ ret = -EPERM;
+ }
+
+ return 0;
+}
+
+/**
+ * relay_printk - Output a string to debugfs mount 'directly' using 'trace'
+ * @tpk: Structure containing info such as parent_dir and directory
+ * @format: String containing format string specifiers
+ * @ap: List of arguments
+ */
+int relay_printk(struct relay_printk_data *tpk, char *format, ...)
+{
+ int ret = 0;
+ va_list(ap);
+ unsigned long flags = 0;
+
+ va_start(ap, format);
+
+ ret = init_relay_interface(tpk);
+ if (unlikely(ret))
+ return ret;
+
+ /* Now do the actual printing */
+ /* Take an RCU Lock over the relay_info state */
+ rcu_read_lock();
+ /* Take a spinlock for the global buffer used by relay */
+ if (tpk->flags & RELAY_GLOBAL_CHANNEL)
+ spin_lock_irqsave(&tpk->ti->relay_lock, flags);
+ ret = relay_printf(tpk->ti, format, ap);
+ if (tpk->flags & RELAY_GLOBAL_CHANNEL)
+ spin_unlock_irqrestore(&tpk->ti->relay_lock, flags);
+ rcu_read_unlock();
+
+ va_end(ap);
+ return ret;
+}
+EXPORT_SYMBOL(relay_printk);
+
+/**
+ * relay_dump - Output binary into debugfs mount 'directly' using 'trace'
+ * @tpk: Structure containing info such as parent_dir and directory
+ * @output: Data that needs to be output
+ * @output_len: Length of the output data
+ */
+int relay_dump(struct relay_printk_data *tpk, const void *output,
+ const int output_len)
+{
+ char *record;
+ unsigned long flags = 0;
+ int ret = 0;
+
+ ret = init_relay_interface(tpk);
+ if (unlikely(ret))
+ return ret;
+
+ /* Now do the actual printing */
+ rcu_read_lock();
+ /* Take a spinlock for the global buffer used by relay */
+ if (tpk->flags & RELAY_GLOBAL_CHANNEL)
+ spin_lock_irqsave(&tpk->ti->relay_lock, flags);
+ record = relay_reserve(tpk->ti->rchan, output_len);
+
+ if (record && relay_running(tpk->ti))
+ memcpy(record, output, output_len);
+ else {
+ if (record)
+ ret = -EPERM;
+ else
+ ret = -ENOMEM;
+ }
+ if (tpk->flags & RELAY_GLOBAL_CHANNEL)
+ spin_unlock_irqrestore(&tpk->ti->relay_lock, flags);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(relay_dump);
Index: linux-2.6.25/lib/trace.c
===================================================================
--- linux-2.6.25.orig/lib/trace.c
+++ /dev/null
@@ -1,563 +0,0 @@
-/*
- * Based on blktrace code, Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
- * Moved to utt.c by Tom Zanussi <zanussi@us.ibm.com>, 2006
- * Additional contributions by:
- * Martin Hunt <hunt@redhat.com>, 2007
- * David Wilder <dwilder@us.ibm.com>, 2007
- * Renamed to trace <dwilder.ibm.com>, 2007
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/mutex.h>
-#include <linux/debugfs.h>
-#include <linux/trace.h>
-
-static LIST_HEAD(trace_roots);
-static DEFINE_MUTEX(trace_mutex);
-
-static int state_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = inode->i_private;
- return 0;
-}
-
-static ssize_t state_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *ppos)
-{
- struct trace_info *trace = filp->private_data;
- char *buf = "trace not started\n";
-
- if (trace->state == TRACE_STOPPED)
- buf = "stopped\n";
- else if (trace->state == TRACE_RUNNING)
- buf = "running\n";
- return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
-}
-
-static ssize_t state_write(struct file *filp, const char __user *buffer,
- size_t count, loff_t *ppos)
-{
- struct trace_info *trace = filp->private_data;
- char buf[16];
- int ret;
-
- if (trace->flags & TRACE_DISABLE_STATE)
- return -EINVAL;
-
- if (count > sizeof(buf) - 1)
- return -EINVAL;
-
- if (copy_from_user(buf, buffer, count))
- return -EFAULT;
-
- buf[count-1] = '\0';
-
- if (strcmp(buf, "start") == 0) {
- ret = trace_start(trace);
- if (ret)
- return ret;
- } else if (strcmp(buf, "stop") == 0)
- trace_stop(trace);
- else
- return -EINVAL;
-
- return count;
-}
-
-static struct file_operations state_fops = {
- .owner = THIS_MODULE,
- .open = state_open,
- .read = state_read,
- .write = state_write,
-};
-
-static void remove_root(struct trace_info *trace)
-{
- if (trace->root->root && simple_empty(trace->root->root)) {
- debugfs_remove(trace->root->root);
- list_del(&trace->root->list);
- kfree(trace->root);
- trace->root = NULL;
- }
-}
-
-static void remove_tree(struct trace_info *trace)
-{
- mutex_lock(&trace_mutex);
- debugfs_remove(trace->dir);
-
- if (trace->root) {
- if (--trace->root->users == 0)
- remove_root(trace);
- }
-
- mutex_unlock(&trace_mutex);
-}
-
-/*
- * Creates the trace_root if it's not found.
- */
-static struct trace_root *lookup_root(const char *root)
-{
- struct list_head *pos;
- struct trace_root *r;
-
- list_for_each(pos, &trace_roots) {
- r = list_entry(pos, struct trace_root, list);
- if (!strcmp(r->name, root))
- return r;
- }
-
- r = kzalloc(sizeof(struct trace_root), GFP_KERNEL);
- if (!r)
- return ERR_PTR(-ENOMEM);
-
- strlcpy(r->name, root, sizeof(r->name));
-
- r->root = debugfs_create_dir(root, NULL);
- if (IS_ERR(r->root))
- r->root = NULL;
- else
- list_add(&r->list, &trace_roots);
-
- return r;
-}
-
-static struct dentry *create_tree(struct trace_info *trace, const char *root,
- const char *name)
-{
- struct dentry *dir = NULL;
-
- if (root == NULL || name == NULL)
- return ERR_PTR(-EINVAL);
-
- mutex_lock(&trace_mutex);
-
- trace->root = lookup_root(root);
- if (IS_ERR(trace->root)) {
- trace->root = NULL;
- goto err;
- }
-
- dir = debugfs_create_dir(name, trace->root->root);
- if (IS_ERR(dir))
- remove_root(trace);
- else
- trace->root->users++;
-
-err:
- mutex_unlock(&trace_mutex);
- return dir;
-}
-
-static int dropped_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = inode->i_private;
-
- return 0;
-}
-
-static ssize_t dropped_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *ppos)
-{
- struct trace_info *trace = filp->private_data;
- char buf[16];
-
- snprintf(buf, sizeof(buf), "%u\n", atomic_read(&trace->dropped));
-
- return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
-}
-
-static struct file_operations dropped_fops = {
- .owner = THIS_MODULE,
- .open = dropped_open,
- .read = dropped_read,
-};
-
-static int reset_consumed_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = inode->i_private;
-
- return 0;
-}
-
-static ssize_t reset_consumed_write(struct file *filp,
- const char __user *buffer,
- size_t count, loff_t *ppos)
-{
- int ret = count;
- struct trace_info *trace = filp->private_data;
-
- mutex_lock(&trace->state_mutex);
- switch (trace->state) {
- case TRACE_RUNNING:
- trace->state = TRACE_STOPPED;
- synchronize_rcu();
- relay_flush(trace->rchan);
- relay_reset_consumed(trace->rchan);
- trace->state = TRACE_RUNNING;
- break;
- case TRACE_STOPPED:
- relay_reset_consumed(trace->rchan);
- break;
- default:
- ret = -EINVAL;
- }
- mutex_unlock(&trace->state_mutex);
- return ret;
-}
-
-static struct file_operations reset_consumed_fops = {
- .owner = THIS_MODULE,
- .open = reset_consumed_open,
- .write = reset_consumed_write
-};
-
-static int sub_size_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = inode->i_private;
-
- return 0;
-}
-
-static ssize_t sub_size_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *ppos)
-{
- struct trace_info *trace = filp->private_data;
- char buf[32];
-
- snprintf(buf, sizeof(buf), "%zu\n", trace->rchan->subbuf_size);
-
- return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
-}
-
-static struct file_operations sub_size_fops = {
- .owner = THIS_MODULE,
- .open = sub_size_open,
- .read = sub_size_read,
-};
-
-static int nr_sub_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = inode->i_private;
- return 0;
-}
-
-static ssize_t nr_sub_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *ppos)
-{
- struct trace_info *trace = filp->private_data;
- char buf[32];
-
- snprintf(buf, sizeof(buf), "%zu\n", trace->rchan->n_subbufs);
-
- return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
-}
-
-static struct file_operations nr_sub_fops = {
- .owner = THIS_MODULE,
- .open = nr_sub_open,
- .read = nr_sub_read,
-};
-
-/*
- * Keep track of how many times we encountered a full subbuffer, to aid
- * the user space app in telling how many lost events there were.
- */
-static int subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
- void *prev_subbuf, size_t prev_padding)
-{
- struct trace_info *trace = buf->chan->private_data;
-
- if (trace->flags & TRACE_FLIGHT_CHANNEL)
- return 1;
-
- if (!relay_buf_full(buf))
- return 1;
-
- atomic_inc(&trace->dropped);
-
- return 0;
-}
-
-static int remove_buf_file_callback(struct dentry *dentry)
-{
- debugfs_remove(dentry);
-
- return 0;
-}
-
-static struct dentry *create_buf_file_callback(const char *filename,
- struct dentry *parent, int mode,
- struct rchan_buf *buf,
- int *is_global)
-{
- return debugfs_create_file(filename, mode, parent, buf,
- &relay_file_operations);
-}
-
-static struct dentry *create_global_buf_file_callback(const char *filename,
- struct dentry *parent,
- int mode,
- struct rchan_buf *buf,
- int *is_global)
-{
- *is_global = 1;
-
- return debugfs_create_file(filename, mode, parent, buf,
- &relay_file_operations);
-}
-
-static struct rchan_callbacks relay_callbacks = {
- .subbuf_start = subbuf_start_callback,
- .create_buf_file = create_buf_file_callback,
- .remove_buf_file = remove_buf_file_callback,
-};
-static struct rchan_callbacks relay_callbacks_global = {
- .subbuf_start = subbuf_start_callback,
- .create_buf_file = create_global_buf_file_callback,
- .remove_buf_file = remove_buf_file_callback,
-};
-
-static void remove_controls(struct trace_info *trace)
-{
- debugfs_remove(trace->state_file);
- debugfs_remove(trace->dropped_file);
- debugfs_remove(trace->reset_consumed_file);
- debugfs_remove(trace->nr_sub_file);
- debugfs_remove(trace->sub_size_file);
- remove_tree(trace);
-}
-
-/*
- * Setup controls for tracing.
- */
-static struct trace_info *setup_controls(const char *root,
- const char *name, u32 flags)
-{
- struct trace_info *trace;
- long ret;
-
- trace = kzalloc(sizeof(*trace), GFP_KERNEL);
- if (!trace) {
- ret = -ENOMEM;
- goto err;
- }
-
- trace->dir = create_tree(trace, root, name);
- if (IS_ERR(trace->dir)) {
- ret = PTR_ERR(trace->dir);
- trace->dir = NULL;
- goto err;
- }
-
- trace->state_file = debugfs_create_file("state", 0444, trace->dir,
- trace, &state_fops);
- if (IS_ERR(trace->state_file)) {
- ret = PTR_ERR(trace->state_file);
- trace->state_file = NULL;
- goto err;
- }
-
- if (!(flags & TRACE_FLIGHT_CHANNEL)) {
- trace->dropped_file = debugfs_create_file("dropped", 0444,
- trace->dir, trace,
- &dropped_fops);
- if (IS_ERR(trace->dropped_file)) {
- ret = PTR_ERR(trace->dropped_file);
- trace->dropped_file = NULL;
- goto err;
- }
- }
-
- if (flags & TRACE_FLIGHT_CHANNEL) {
- trace->reset_consumed_file = debugfs_create_file("rewind", 0444,
- trace->dir, trace,
- &reset_consumed_fops);
- if (IS_ERR(trace->reset_consumed_file)) {
- ret = PTR_ERR(trace->reset_consumed_file);
- trace->reset_consumed_file = NULL;
- goto err;
- }
- }
-
- trace->nr_sub_file = debugfs_create_file("nr_sub", 0444,
- trace->dir, trace,
- &nr_sub_fops);
- if (IS_ERR(trace->nr_sub_file)) {
- ret = PTR_ERR(trace->nr_sub_file);
- trace->nr_sub_file = NULL;
- goto err;
- }
-
- trace->sub_size_file = debugfs_create_file("sub_size", 0444,
- trace->dir, trace,
- &sub_size_fops);
- if (IS_ERR(trace->sub_size_file)) {
- ret = PTR_ERR(trace->sub_size_file);
- trace->sub_size_file = NULL;
- goto err;
- }
-
- return trace;
-err:
- if (trace) {
- remove_controls(trace);
- kfree(trace);
- }
-
- return ERR_PTR(ret);
-}
-
-static int trace_setup_channel(struct trace_info *trace, u32 buf_size,
- u32 buf_nr, u32 flags)
-{
- if (!buf_size || !buf_nr)
- return -EINVAL;
-
- if (flags & TRACE_GLOBAL_CHANNEL)
- trace->rchan = relay_open("trace", trace->dir, buf_size,
- buf_nr, &relay_callbacks_global,
- trace);
- else
- trace->rchan = relay_open("trace", trace->dir, buf_size,
- buf_nr, &relay_callbacks, trace);
-
- if (!trace->rchan)
- return -ENOMEM;
-
- trace->flags = flags;
- trace->state = TRACE_SETUP;
-
- return 0;
-}
-
-/**
- * trace_setup - create a new trace trace handle
- * @root: The root directory name to place trace directories.
- * @name: Trace directory name, created in @root
- * @buf_size: size of the relay sub-buffers
- * @buf_nr: number of relay sub-buffers
- * @flags: Option selection (see trace channel flags definitions)
- *
- * returns a trace_info handle or NULL, if setup failed.
- *
- * The @root is created (if needed) in the root of the debugfs.
- * The default values when flags=0 are: use per-CPU buffering,
- * use non-overwrite mode. See Documentation/trace.txt for details.
- */
-struct trace_info *trace_setup(const char *root, const char *name,
- u32 buf_size, u32 buf_nr, u32 flags)
-{
- struct trace_info *trace;
-
- trace = setup_controls(root, name, flags);
- if (IS_ERR(trace))
- return trace;
-
- trace->buf_size = buf_size;
- trace->buf_nr = buf_nr;
- trace->flags = flags;
- mutex_init(&trace->state_mutex);
- trace->state = TRACE_SETUP;
-
- return trace;
-}
-EXPORT_SYMBOL_GPL(trace_setup);
-
-/**
- * trace_start - start tracing
- * @trace: trace handle to start.
- *
- * returns 0 if successful.
- */
-int trace_start(struct trace_info *trace)
-{
- /*
- * For starting a trace, we can transition from a setup or stopped
- * trace.
- */
- if (trace->state == TRACE_RUNNING)
- return -EINVAL;
-
- mutex_lock(&trace->state_mutex);
- if (trace->state == TRACE_SETUP) {
- int ret;
-
- ret = trace_setup_channel(trace, trace->buf_size,
- trace->buf_nr, trace->flags);
- if (ret) {
- mutex_unlock(&trace->state_mutex);
- return ret;
- }
- }
-
- trace->state = TRACE_RUNNING;
- mutex_unlock(&trace->state_mutex);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(trace_start);
-
-/**
- * trace_stop - stop tracing
- * @trace: trace handle to stop.
- *
- */
-int trace_stop(struct trace_info *trace)
-{
- int ret = -EINVAL;
-
- /*
- * For stopping a trace, the state must be running
- */
- mutex_lock(&trace->state_mutex);
- if (trace->state == TRACE_RUNNING) {
- trace->state = TRACE_STOPPED;
- /*
- * wait for all cpus to see the change in
- * state before continuing
- */
- synchronize_sched();
- relay_flush(trace->rchan);
- ret = 0;
- }
- mutex_unlock(&trace->state_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(trace_stop);
-
-static void trace_cleanup_channel(struct trace_info *trace)
-{
- trace_stop(trace);
- relay_close(trace->rchan);
- trace->rchan = NULL;
-}
-
-/**
- * trace_cleanup - destroys the trace channel, control files and dir
- * @trace: trace handle to cleanup
- */
-void trace_cleanup(struct trace_info *trace)
-{
- trace_cleanup_channel(trace);
- remove_controls(trace);
- kfree(trace);
-}
-EXPORT_SYMBOL_GPL(trace_cleanup);
Index: linux-2.6.25/init/Kconfig
===================================================================
--- linux-2.6.25.orig/init/Kconfig
+++ linux-2.6.25/init/Kconfig
@@ -435,6 +435,15 @@ config RELAY
If unsure, say N.
+config RELAY_DEBUGFS
+ bool "Relay debugfs setup and control"
+ depends on RELAY && DEBUG_FS
+ help
+ This option provides support for the setup, teardown and control
+ of relay channels from kernel code which are mounted on debugfs.
+ It also provides information and control to userspace via a set of
+ debugfs control files. If unsure, say N.
+
config NAMESPACES
bool "Namespaces support" if EMBEDDED
default !EMBEDDED
Index: linux-2.6.25/kernel/Makefile
===================================================================
--- linux-2.6.25.orig/kernel/Makefile
+++ linux-2.6.25/kernel/Makefile
@@ -73,6 +73,7 @@ ifeq ($(CONFIG_PREEMPT_RCU),y)
obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
endif
obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_RELAY_DEBUGFS) += relay_debugfs.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
Index: linux-2.6.25/lib/Kconfig
===================================================================
--- linux-2.6.25.orig/lib/Kconfig
+++ linux-2.6.25/lib/Kconfig
@@ -144,13 +144,4 @@ config CHECK_SIGNATURE
config HAVE_LMB
boolean
-config TRACE
- bool "Trace setup and control"
- depends on RELAY && DEBUG_FS
- help
- This option provides support for the setup, teardown and control
- of tracing channels from kernel code. It also provides trace
- information and control to userspace via a set of debugfs control
- files. If unsure, say N.
-
endmenu
Index: linux-2.6.25/lib/Makefile
===================================================================
--- linux-2.6.25.orig/lib/Makefile
+++ linux-2.6.25/lib/Makefile
@@ -80,8 +80,6 @@ lib-$(CONFIG_GENERIC_BUG) += bug.o
obj-$(CONFIG_HAVE_LMB) += lmb.o
-obj-$(CONFIG_TRACE) += trace.o
-
obj-$(CONFIG_PROFILE_LIKELY) += likely_prof.o
hostprogs-y := gen_crc32table
Index: linux-2.6.25/samples/relay/Makefile
===================================================================
--- /dev/null
+++ linux-2.6.25/samples/relay/Makefile
@@ -0,0 +1,4 @@
+# builds the trace example kernel modules;
+# then to use (as root): insmod <fork_trace.ko>
+
+obj-$(CONFIG_SAMPLE_RELAY) := fork_trace.o fork_new_trace.o
Index: linux-2.6.25/samples/relay/fork_trace.c
===================================================================
--- /dev/null
+++ linux-2.6.25/samples/relay/fork_trace.c
@@ -0,0 +1,132 @@
+/*
+ * An example of using 'relay debugfs' in a kprobes module
+ *
+ * Copyright (C) 2007 IBM Inc.
+ *
+ * David Wilder <dwilder@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * -------
+ * This module creates a 'relay debugfs' channel and places a kprobe
+ * on the function do_fork(). The value of current->pid is written to
+ * the 'relay debugfs' channel each time the kprobe is hit..
+ *
+ * How to run the example:
+ * $ mount -t debugfs /debug
+ * $ insmod fork_trace.ko
+ *
+ * To view the data produced by the module:
+ * $ cat /debug/relay_debugfs_example/do_fork/trace0
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/relay_debugfs.h>
+
+#define USE_GLOBAL_BUFFERS 1
+#define USE_FLIGHT 1
+
+#define PROBE_POINT "do_fork"
+
+static struct kprobe kp;
+static struct relay_info *kprobes_relay;
+
+#ifdef USE_GLOBAL_BUFFERS
+static DEFINE_SPINLOCK(relay_debugfs_lock);
+#endif
+
+/*
+ * Send formatted trace data to 'relay debugfs' channel.
+ * @note Preemption must be disabled to use this.
+ */
+static void relay_debugfs_printf(struct relay_info *relay, const char *format, ...)
+{
+ va_list ap, aq;
+ char *record;
+ unsigned long flags;
+ int len;
+
+ if (!relay)
+ return;
+
+#ifdef USE_GLOBAL_BUFFERS
+ spin_lock_irqsave(&relay_debugfs_lock, flags);
+#endif
+ if (relay_running(relay)) {
+ va_start(ap, format);
+ va_copy(aq, ap);
+ len = vsnprintf(NULL, 0, format, aq);
+ va_end(aq);
+ record = relay_reserve(relay->rchan, ++len);
+ if (record)
+ vsnprintf(record, len, format, ap);
+ va_end(ap);
+ }
+#ifdef USE_GLOBAL_BUFFERS
+ spin_unlock_irqrestore(&relay_debugfs_lock, flags);
+#endif
+}
+
+static int handler_pre(struct kprobe *p, struct pt_regs *regs)
+{
+ rcu_read_lock();
+ relay_debugfs_printf(kprobes_relay, "%d\n", current->pid);
+ rcu_read_unlock();
+ return 0;
+}
+
+int init_module(void)
+{
+ int ret;
+ u32 flags = 0;
+
+#ifdef USE_GLOBAL_BUFFERS
+ flags |= RELAY_GLOBAL_CHANNEL;
+#endif
+
+#ifdef USE_FLIGHT
+ flags |= RELAY_FLIGHT_CHANNEL;
+#endif
+
+ /* setup the relay */
+ kprobes_relay = relay_setup("relay_example", PROBE_POINT,
+ 1024, 8, flags);
+ if (IS_ERR(kprobes_relay))
+ return PTR_ERR(kprobes_relay);
+
+ relay_start(kprobes_relay);
+
+ /* setup the kprobe */
+ kp.pre_handler = handler_pre;
+ kp.post_handler = NULL;
+ kp.fault_handler = NULL;
+ kp.symbol_name = PROBE_POINT;
+ ret = register_kprobe(&kp);
+ if (ret) {
+ printk(KERN_ERR "fork_trace: register_kprobe failed\n");
+ return ret;
+ }
+ return 0;
+}
+
+void cleanup_module(void)
+{
+ unregister_kprobe(&kp);
+ relay_stop(kprobes_relay);
+ relay_cleanup(kprobes_relay);
+}
+MODULE_LICENSE("GPL");
Index: linux-2.6.25/samples/trace/Makefile
===================================================================
--- linux-2.6.25.orig/samples/trace/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# builds the trace example kernel modules;
-# then to use (as root): insmod <fork_trace.ko>
-
-obj-$(CONFIG_SAMPLE_TRACE) := fork_trace.o
Index: linux-2.6.25/samples/trace/fork_trace.c
===================================================================
--- linux-2.6.25.orig/samples/trace/fork_trace.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * An example of using trace in a kprobes module
- *
- * Copyright (C) 2007 IBM Inc.
- *
- * David Wilder <dwilder@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * -------
- * This module creates a trace channel and places a kprobe
- * on the function do_fork(). The value of current->pid is written to
- * the trace channel each time the kprobe is hit..
- *
- * How to run the example:
- * $ mount -t debugfs /debug
- * $ insmod fork_trace.ko
- *
- * To view the data produced by the module:
- * $ cat /debug/trace_example/do_fork/trace0
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/kprobes.h>
-#include <linux/trace.h>
-
-#define USE_GLOBAL_BUFFERS 1
-#define USE_FLIGHT 1
-
-#define PROBE_POINT "do_fork"
-
-static struct kprobe kp;
-static struct trace_info *kprobes_trace;
-
-#ifdef USE_GLOBAL_BUFFERS
-static DEFINE_SPINLOCK(trace_lock);
-#endif
-
-/*
- * Send formatted trace data to trace channel.
- * @note Preemption must be disabled to use this.
- */
-static void trace_printf(struct trace_info *trace, const char *format, ...)
-{
- va_list ap, aq;
- char *record;
- unsigned long flags;
- int len;
-
- if (!trace)
- return;
-
-#ifdef USE_GLOBAL_BUFFERS
- spin_lock_irqsave(&trace_lock, flags);
-#endif
- if (trace_running(trace)) {
- va_start(ap, format);
- va_copy(aq, ap);
- len = vsnprintf(NULL, 0, format, aq);
- va_end(aq);
- record = relay_reserve(trace->rchan, ++len);
- if (record)
- vsnprintf(record, len, format, ap);
- va_end(ap);
- }
-#ifdef USE_GLOBAL_BUFFERS
- spin_unlock_irqrestore(&trace_lock, flags);
-#endif
-}
-
-static int handler_pre(struct kprobe *p, struct pt_regs *regs)
-{
- rcu_read_lock();
- trace_printf(kprobes_trace, "%d\n", current->pid);
- rcu_read_unlock();
- return 0;
-}
-
-int init_module(void)
-{
- int ret;
- u32 flags = 0;
-
-#ifdef USE_GLOBAL_BUFFERS
- flags |= TRACE_GLOBAL_CHANNEL;
-#endif
-
-#ifdef USE_FLIGHT
- flags |= TRACE_FLIGHT_CHANNEL;
-#endif
-
- /* setup the trace */
- kprobes_trace = trace_setup("trace_example", PROBE_POINT,
- 1024, 8, flags);
- if (IS_ERR(kprobes_trace))
- return PTR_ERR(kprobes_trace);
-
- trace_start(kprobes_trace);
-
- /* setup the kprobe */
- kp.pre_handler = handler_pre;
- kp.post_handler = NULL;
- kp.fault_handler = NULL;
- kp.symbol_name = PROBE_POINT;
- ret = register_kprobe(&kp);
- if (ret) {
- printk(KERN_ERR "fork_trace: register_kprobe failed\n");
- return ret;
- }
- return 0;
-}
-
-void cleanup_module(void)
-{
- unregister_kprobe(&kp);
- trace_stop(kprobes_trace);
- trace_cleanup(kprobes_trace);
-}
-MODULE_LICENSE("GPL");
Index: linux-2.6.25/samples/Kconfig
===================================================================
--- linux-2.6.25.orig/samples/Kconfig
+++ linux-2.6.25/samples/Kconfig
@@ -33,11 +33,11 @@ config SAMPLE_KRETPROBES
default m
depends on SAMPLE_KPROBES && KRETPROBES
-config SAMPLE_TRACE
- tristate "Build trace example -- loadable modules only"
- depends on TRACE && KPROBES && m
+config SAMPLE_RELAY
+ tristate "Build relay debugfs example -- loadable modules only"
+ depends on RELAY && KPROBES && m
help
- This builds a trace example module.
+ This builds a relay debugfs example module.
endif # SAMPLES
Index: linux-2.6.25/samples/Makefile
===================================================================
--- linux-2.6.25.orig/samples/Makefile
+++ linux-2.6.25/samples/Makefile
@@ -1,3 +1,3 @@
# Makefile for Linux samples code
-obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ trace/
+obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ relay/
Index: linux-2.6.25/Documentation/filesystems/relay.txt
===================================================================
--- linux-2.6.25.orig/Documentation/filesystems/relay.txt
+++ linux-2.6.25/Documentation/filesystems/relay.txt
@@ -627,6 +627,28 @@ are:
5) Destroy the 'relay debugfs' channel and underlying relay channel -
relay_cleanup().
+Alternatively the user may choose to make use of two new interfaces --
+relay_printk() and relay_dump() -- to setup trace interface and
+relay_cleanup_all() to tear-down the same.
+
+Steps to use:
+1) Create and populate an instance of relay_printk_data structure. The fields
+ parent_dir and dir are mandatory. The fields buf_size, sub_buf_size and flags
+ are optional and will take default values if not populated. The field
+ 'exists' and ti are for the trace infrastructure to use. The pointer to the
+ 'struct relay_info' i.e. ti may be used to perform fine granular operations
+ such as determine the state of the 'trace', stop individual traces, etc.
+2) Default values for buf_size and sub_buf_size are 4096, 40 respectively.
+3) Use relay_dump() to output binary data which may be acted upon by a
+ high-level program (say dumping a structure). relay_printk() can be used
+ for string output. Pass a pointer to the instance of relay_printk_data
+ structure to these functions along with other parameters. The output from
+ these functions can be found at
+ <debugfs_mount>/<parent_dir>/<dir>/trace<0..n>.
+4) relay_cleanup_all() for a given parent directory will cleanup and remove all
+ trace directories created under the specified directory.
+5) Sample code for the same can be found in samples/trace/fork_new_trace.c
+
Kernel Configuration
--------------------
To use 'relay debugfs', configure your kernel with CONFIG_TRACE=y.
Index: linux-2.6.25/samples/trace/fork_new_trace.c
===================================================================
--- /dev/null
+++ linux-2.6.25/samples/trace/fork_new_trace.c
@@ -0,0 +1,99 @@
+/*
+ * An example of using trace in a kprobes module
+ *
+ * Copyright (C) 2008 IBM Inc.
+ *
+ * K.Prasad <prasad@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * -------
+ * This module creates a trace channel and places a kprobe
+ * on the function do_fork(). The value of current->pid is written to
+ * the trace channel each time the kprobe is hit..
+ *
+ * How to run the example:
+ * $ mount -t debugfs /debug
+ * $ insmod fork_new_trace.ko
+ *
+ * To view the data produced by the module:
+ * $ cat /debug/relay_example/do_fork/trace0
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/relay_debugfs.h>
+
+#define SAMPLE_PARENT_DIR "relay_new_example"
+#define PROBE_POINT "do_fork"
+
+static struct kprobe kp;
+static struct relay_printk_data *tpk;
+
+static int handler_pre(struct kprobe *p, struct pt_regs *regs)
+{
+ relay_printk(tpk, "%d\n", current->pid);
+ return 0;
+}
+
+int init_module(void)
+{
+ int ret = 0;
+ int len_parent_dir, len_dir;
+
+ /* setup the kprobe */
+ kp.pre_handler = handler_pre;
+ kp.post_handler = NULL;
+ kp.fault_handler = NULL;
+ kp.symbol_name = PROBE_POINT;
+ ret = register_kprobe(&kp);
+ if (ret) {
+ printk(KERN_ERR "fork_trace: register_kprobe failed\n");
+ return ret;
+ }
+
+ len_parent_dir = strlen(SAMPLE_PARENT_DIR) + 1;
+ /* Initialising len_dir to the larger of the two dir names */
+ len_dir = strlen("kprobe_struct") + 1;
+
+ tpk = kzalloc(sizeof(*tpk), GFP_KERNEL);
+ if (!tpk)
+ ret = 1;
+
+ tpk->parent_dir = SAMPLE_PARENT_DIR;
+
+ /* Let's do a binary dump of struct kprobe using relay_dump */
+ tpk->dir = "kprobes_struct";
+ tpk->flags = TRACE_GLOBAL_CHANNEL;
+ relay_dump(tpk, &kp, sizeof(kp));
+
+ /* Now change the directory to collect fork pid data */
+ tpk->dir = PROBE_POINT;
+
+ if (ret)
+ printk(KERN_ERR "Unable to find required free memory. "
+ "Trace new sample module loading aborted");
+ return ret;
+}
+
+void cleanup_module(void)
+{
+ unregister_kprobe(&kp);
+
+ /* Just a single cleanup call passing the parent dir string */
+ relay_cleanup_all(SAMPLE_PARENT_DIR);
+}
+MODULE_LICENSE("GPL");
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2008-05-28 18:49 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-16 17:01 [Patch 0/1] Enhancements to 'trace' infrastructure - v2 K.Prasad
2008-05-16 17:04 ` [RFC Patch 1/1] trace_printk and trace_dump interface " K.Prasad
2008-05-17 2:22 ` K.Prasad
2008-05-19 23:21 ` Andrew Morton
2008-05-20 19:53 ` K.Prasad
2008-05-20 20:12 ` Andrew Morton
2008-05-23 4:37 ` K.Prasad
2008-05-28 18:16 ` K.Prasad
2008-05-28 18:37 ` [RFC Patch 0/1] Merging Documentation/trace.txt with Documentation/filesystems/relay.txt K.Prasad
2008-05-28 18:48 ` [RFC Patch 2/2] Renaming lib/trace.[ch] files to kernel/relay_debugfs.[ch] and enhancements K.Prasad
2008-05-19 20:02 ` [RFC Patch 1/1] trace_printk and trace_dump interface - v2 David Wilder
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox